library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
library(here)
here() starts at /Users/avrilwang/Desktop/Project-Plasmodium
library(deSolve)
library(crone)
library(optimParallel)
Loading required package: parallel
library(doParallel)
Loading required package: foreach
Loading required package: iterators
library(doRNG)
Loading required package: rngtools
library(arrow)

Attaching package: ‘arrow’

The following object is masked from ‘package:utils’:

    timestamp
library(stringr)
library(parallel)
library(ggpubr)
library(scales)

Notebook for plotting all of the figures for PloS Biology manuscript submission

Guidelines: taken from https://journals.plos.org/plosbiology/s/figures#loc-figure-file-requirements 1. format: eps 2. max file size: 10 MB 3. text size: Arial, Times, or Symbol font only in 8-12 point 2. figure size: Width: 789 – 2250 pixels (at 300 dpi). Height maximum: 2625 pixels (at 300 dpi).

#=========================================# figure 1: best single and co-infection cue #=========================================# Figure displaying the reaction norms of best single and co-infection.

#——- optimal cue reaction norm ———–# # read data

process data for reaction norm

# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))

# get si_label with ci cue range
si_ci_rug.df <- ci_rug.df %>% 
  mutate(label_si = case_when(
    label %in% c("I", "I1+I2") ~ "I",
    label %in% c("I log","I1+I2 log") ~ "I log",
    label %in% c("Ig", "Ig1+Ig2") ~ "Ig",
    label %in% c("Ig log") ~ "Ig log",
    label %in% c("sum", "I+Ig") ~ "I+Ig",
    label %in% c("sum log", "I+Ig log") ~ "I+Ig log",
    label == "R" ~ "R",
    label == "R log" ~ "R log",
    label %in% c("G", "G1+G2") ~ "G",
    label == "G log" ~ "G log"
  )) 

# get limit for si_rug
si_rug_lim.df <- si_rug.df %>% 
  filter(time <= 20) %>%
  group_by(label)%>% 
  summarise(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  select(label_si = label, min_si = min, max_si = max)

# filter to restriction conversion rate reaction norm range to cue ranges that appear in rug
## change to Inf/-inf to NA. Note that I am first joining with si rug lim to check which limit is larger, We will go with the cue range that has the largest span
ci_rug_lim.df <- si_ci_rug.df %>% 
  group_by(label) %>% 
  mutate(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  distinct(label, .keep_all = T) %>% 
  select(label, label_si, min, max)

rug_lim.final <- ci_rug_lim.df %>% left_join(si_rug_lim.df, by = "label_si") %>% 
  mutate(final_min = min(min, min_si),
         final_max = max(max, max_si))

# get second rug_lim.final for single infection
rug_lim.final2 <- rug_lim.final %>% 
  group_by(label_si) %>% 
  mutate(final_min = min(final_min, na.rm = T),
         final_max = max(final_max, na.rm = T))

# filter ci_rn by limit
ci_rn.df2 <- ci_rn.df %>% 
  left_join(rug_lim.final, by  = "label") %>% 
  group_by(label) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  arrange(cue_range, .by_group = T) %>% 
  filter(row_number() %% 10 == 0) 
# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))

match single infection rn with coinfection

# get ci label to si rug and filter by limit
si_rn.df2 <- left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c("label" = "label_si")) %>% 
  left_join(rug_lim.final, by  = c("label_ci" = "label")) %>% 
  group_by(label_ci) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  arrange(cue_range, .by_group = T) %>% 
  filter(row_number() %% 10 == 0) %>% 
  select(cue_range, cr, label_ci, label_si)
Warning in left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c(label = "label_si")) :
  Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
# get ci label to si rug, we will keep one unique value per label
si_rug.df2 <- select(si_rug.df, value, label_si = label) %>% distinct(value, label_si)

plot reaction norm

# join with ezlabel
ci_rn.df3 <- ci_rn.df2 %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rn.df3 <- si_rn.df2 %>% left_join(ez_label, by = "label_ci")
ci_rug.df3 <- ci_rug.df %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rug.df3 <- si_rug.df2 %>% left_join(ez_label, by = "label_si")
Warning in left_join(., ez_label, by = "label_si") :
  Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 27002 of `x` matches multiple rows in `y`.
ℹ Row 12 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
# redo order of cues
ci_rn.df3$ez_label <- factor(ci_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10")) 

si_rn.df3$ez_label <- factor(si_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

ci_rug.df3$ez_label <- factor(ci_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

si_rug.df3$ez_label <- factor(si_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))
# plot
ggplot() +
  geom_line(data = ci_rn.df3, aes(x = cue_range, y = cr, color = "Co-infection")) +
  geom_point(data = ci_rn.df3 %>% 
    group_by(label) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Co-infection", shape = "Co-infection"), size = 2) +
  geom_line(data = si_rn.df3, aes(x = cue_range, y = cr, color = "Single infection")) +
  geom_point(data = si_rn.df3 %>% 
    group_by(label_ci) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Single infection", shape = "Single infection"), size = 2) +
  geom_rug(data = ci_rug.df3, aes(x = value), color = "#4575b4", sides = "t", length = unit(0.1, "npc")) +
  geom_rug(data = si_rug.df3, aes(x = value), color = "#fc8d59", sides = "b", length = unit(0.1, "npc")) +
  facet_wrap(~ez_label, scales = "free_x", ncol = 2) +
  ylim(-0.3, 1.3) +
  theme_bw() +
  labs(y = "Conversion rate", x = "Cue range", color = "Model", shape = "Model") +
  scale_x_continuous(labels = function(x) format(x, scientific = T),
                     guide = guide_axis(check.overlap = TRUE)) + 
                    theme(axis.text.x = element_text(size = 7),
                          legend.position = "right")  +
  scale_color_manual(values=c( "#4575b4", "#fc8d59")) +
  theme(strip.text.x = element_text(margin = margin(b = 0.5, t = 0.5)))

ggsave(units = "px", dpi = 300, width = 2000, height = 2500, filename = here("figures/plos-bio/reaction_norm.tiff"), bg = "white", scale = 1.1)

#=========================================# Plotting single and co-infection fitness scatter plot #=========================================# # import in data

# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

ez_label <- read.csv(here("data/ez_label.csv"))

process for final 20 days fitness

# get single infection maximum tau_cum for 20 days
si_fitness.df <- si_dyn.df %>% 
  filter(variable == "tau_cum" & time == 20)

# get co-infection maximum tau_cum for 20 days
ci_fitness.df <- ci_dyn.df %>% 
  filter(variable == "tau_cum1" & time == 20)

# join together two dataframes and add labels
si_ci_fitness.df <- select(ci_fitness.df, fitness_ci = value, label_ci = label) %>% 
  left_join(ez_label, by = "label_ci") %>% 
  left_join(select(si_fitness.df, fitness_si = value, id_si = id), by = "id_si") %>% 
  select(ez_label_si, ez_label, fitness_si, fitness_ci) %>% 
  rbind(data.frame( # add time
    ez_label_si = "Time", 
    ez_label = "Time",
    fitness_si =  9.787899,
    fitness_ci  = 2.311841
  ))

si_ci_fitness.df
# join together two dataframes and add labels
si_ci_fitness.df <- select(ci_fitness.df, fitness_ci = value, label_ci = label) %>% 
  left_join(ez_label, by = "label_ci") %>% 
  left_join(select(si_fitness.df, fitness_si = value, id_si = id), by = "id_si") %>% 
  select(ez_label_si, ez_label, fitness_si, fitness_ci) %>% 
  rbind(data.frame( # add time
    ez_label_si = "Time", 
    ez_label = "Time",
    fitness_si =  9.787899,
    fitness_ci  = 2.311841
  ))

plot scatter point of single infection vs co-infection

si_ci_fitness.pl <- ggplot() +
  geom_point(data = si_ci_fitness.df, aes(x = fitness_si, y = fitness_ci, color = ez_label_si, shape = ez_label_si), size = 3.5) +
  ggrepel::geom_label_repel(data = si_ci_fitness.df, aes(label = ez_label, x = fitness_si, y = fitness_ci),
                            fill = "white",xlim = c(-Inf, Inf), ylim = c(NA, NA)) +
  labs(x = "Maximum single infection fitness", y = "Maximum Co-infection fitness (per strain)",
       color = "Single infection cue", shape = "Single infection cue") +
  scale_shape_manual(values = 15:25) +
  lims(x = c(8, 10.3)) +
  theme_bw()

#=========================================================# # time series conversion rate for single and co-infection #=========================================================# #———get “ideal” single infection and co-infection dynamics———# This is when stuff are optimized based on time

source(here("functions/chabaudi_si_clean.R"))
source(here("functions/chabaudi_ci_clean.R"))

# single infection dynamic with time as cue. Optimized using local optimizer. Note that the time variable in dual cue is slightly different with higher flexibility. While that increases the fitness value by ~0.1, the overall conversion rate dynamic does not change that much
si_t.df <- chabaudi_si_clean(
  parameters_cr = c(4.55386, -13.0056, 4.15466, -11.9424),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, 20, by = 1e-3),
  cue = "t",
  solver = "vode",
  dyn = T)

# co-infection dynamic with time as cue
ci_t.df <- chabaudi_ci_clean(parameters_cr_1 = c(26.16425,  -71.07799,  53.34121,   -166.25693),
                            parameters_cr_2 = c(26.16425,   -71.07799,  53.34121,   -166.25693),
                            immunity = "tsukushi",
                            parameters = parameters_tsukushi,
                            time_range = time_range, 
                            cue_1 =  "t",
                            cue_2= "t",
                            cue_range_1 = time_range, # cue range of strain 1
                            cue_range_2 = time_range, # cue range of strain 2
                            log_cue_1 = "none", # whether to log transform cue 1
                            log_cue_2 = "none", # whether to log transform cue 2
                            solver = "vode", # solver for numerical integration. Vode often gives faster runs
                              dyn = T)

# get only conversion rate and make same format as si_cr.df2/ci_cr.df2
si_t.df %>% filter(time == 20 & variable == "tau_cum") ## fitness value of time as cue in single infection
si_t.cr <- si_t.df %>% 
  filter(variable == "cr") %>% 
  mutate(ez_label_si = "Time", 
         fitness_si =9.787899) %>% 
  select(time, value, fitness_si, ez_label_si)


## get fitness value of co-infection time
ci_t.df %>% filter(variable == "tau_cum1") %>% summarize(max = max(value, na.rm = T))
ci_t.cr <- ci_t.df %>% 
  filter(variable == "cr_1") %>% 
  mutate(ez_label = "Time", 
         fitness_ci = 2.311841) %>%
  select(time, value, fitness_ci, ez_label)

#——— single infection conversion rate heat map————–# # process info for single infection

—————–co-infection conversion rate heatmap———–

plot co-infeciton convesion rate heatmap

ggplot() +
  geom_raster(data = ci_cr.df2, 
              aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
  labs(x = "Time (days)", y = "Co-infection cues", fill = "Conversion rate") +
  viridis::scale_fill_viridis(limits = c(0, 1), , oob = scales::squish) +
  xlim(1, 20) +
  theme_bw()
Warning: Removed 17028 rows containing missing values (`geom_raster()`).

#——— assemble final figure ————–#

# combine conversion rate dynamic of single infection and co-infection
cr.pl <- ggarrange(si_cr.pl, ci_cr.pl, labels = c("B", "C"), ncol = 2, common.legend = T, align = "h")

# combine with fitness scatterplot
ggarrange(si_ci_fitness.pl, cr.pl, ncol = 1, labels = c("A", ""), align = "hv")

# save
ggsave(units = "px", dpi = 300, width = 2000, height = 2000, filename = here("figures/plos-bio/fitness_cr-dyn.tiff"), bg = "white", scale = 1.35)

#===========================================================# # Demographic stochasticity #===========================================================# #———- plot heat map—————# # import in all fitness files

file_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv", full.names = T)
name_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv")
name_ls <- gsub("*.csv", "", name_ls)

# 60, which is about right
length(file_ls)
[1] 60
# read in files
fitness.ls <- lapply(file_ls, read.csv)
Warning: stack imbalance in 'lapply', 6 then 8
Warning: stack imbalance in '<-', 2 then 4
# assign unique ID
fitness.ls <- mapply(cbind, fitness.ls, "ID" = name_ls, SIMPLIFY = F)

process data

# get metainfo from ID
fitness.ls2 <- mclapply(fitness.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- unlist(str_split(unique(id_col), pattern = "_"))[[4]]
  rand_var <- unlist(str_split(unique(id_col), pattern = "_"))[[5]]
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = rand_var, mean_fitness = mean_fitness, sd_fitness = sd_fitness)
  return(res)
})

Get reference data

reference_ls <- list.files(path = here("data/MC2"), pattern = "*.csv", full.names = T)
reference_name.ls <- gsub("*.csv", "", list.files(path = here("data/MC2/"), pattern = "*.csv"))

# read in the files
reference.ls <- lapply(reference_ls, read.csv)

# assign unique ID
reference.ls <- mapply(cbind, reference.ls, "ID" = reference_name.ls, SIMPLIFY = F)

# get meta data
reference.ls2 <- mclapply(reference.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[2]]
  # get log
  third_col <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- ifelse(third_col == "log", "log10", "none")
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = "all", ref_mean_fitness = mean_fitness, ref_sd_fitness = sd_fitness)
  return(res)
})

combine MC partitioned and reference df

# get unique column values for each cue, log, and rand_var combo
fitness.ls3 <- do.call(rbind, fitness.ls2)
fitness.ls3 <- fitness.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# repeat with reference
reference.ls3 <- do.call(rbind, reference.ls2)
reference.ls3 <- reference.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# combine!
ref_fit.df <- left_join(fitness.ls3, reference.ls3, by = c("cue" = "cue", "log"= "log"))

compute proportion fitness and variation

ref_fit.df2 <- ref_fit.df %>% 
  mutate(p_sd = sd_fitness/ref_sd_fitness,
         p_mean = ref_mean_fitness/mean_fitness,
         cue_log = paste0(cue, "_", log),
         label = case_when(
           cue == "G" ~ "Gametocyte",
           cue == "I" ~ "Asexual iRBC",
           cue == "I+Ig" ~ "Asexual&sexual\niRBC",
           cue == "Ig" ~ "Sexual iRBC",
           cue == "R" ~ "RBC"
           ),
         parameter = case_when(
           rand_var.x == "rho" ~ "ρ",
           rand_var.x == "phin" ~ "ϕn",
           rand_var.x == "phiw"~ "ϕw",
           rand_var.x == "psin" ~ "ψn",
           rand_var.x == "psiw" ~ "ψw",
           rand_var.x == "beta" ~ "β"
         ))

plot!

# variation
mc_b <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_sd)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(sd("1 parameter randomized"), sd("all parameters randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

# mean fitness
mc_c <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_mean)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(Mean("all parameters randomized"), Mean("1 parameter randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

mc_partition <- ggarrange(mc_b, mc_c, ncol = 1)

#————– get violine plot of variation in fitness ——————–# # read MC data

# read in dymamics
mc_G_log.dyn <- read_parquet(here("data/MC2/mc_G_log_dyn.parquet"))
mc_G.dyn <- read_parquet(here("data/MC2/mc_G_dyn.parquet"))
mc_R_log.dyn <- read_parquet(here("data/MC2/mc_R_log_dyn.parquet"))
mc_R.dyn <- read_parquet(here("data/MC2/mc_R_dyn.parquet"))
mc_I_log.dyn <- read_parquet(here("data/MC2/mc_I_log_dyn.parquet"))
mc_I.dyn <- read_parquet(here("data/MC2/mc_I_dyn.parquet"))
mc_Ig_log.dyn <- read_parquet(here("data/MC2/mc_Ig_log_dyn.parquet"))
mc_Ig.dyn <- read_parquet(here("data/MC2/mc_Ig_dyn.parquet"))
mc_I_Ig_log.dyn <- read_parquet(here("data/MC2/mc_I+Ig_log_dyn.parquet"))
mc_I_Ig.dyn <- read_parquet(here("data/MC2/mc_I+Ig_dyn.parquet"))

# read in fitness
mc_G_log.fitness <- read.csv(here("data/MC2/mc_G_log_fitness.csv"))
mc_G.fitness <- read.csv(here("data/MC2/mc_G_fitness.csv"))
mc_R_log.fitness <- read.csv(here("data/MC2/mc_R_log_fitness.csv"))
mc_R.fitness <- read.csv(here("data/MC2/mc_R_fitness.csv"))
mc_I_log.fitness <- read.csv(here("data/MC2/mc_I_log_fitness.csv"))
mc_I.fitness <- read.csv(here("data/MC2/mc_I_fitness.csv"))
mc_Ig_log.fitness <- read.csv(here("data/MC2/mc_Ig_log_fitness.csv"))
mc_Ig.fitness <- read.csv(here("data/MC2/mc_Ig_fitness.csv"))
mc_I_Ig_log.fitness <- read.csv(here("data/MC2/mc_I+Ig_log_fitness.csv"))
mc_I_Ig.fitness <- read.csv(here("data/MC2/mc_I+Ig_fitness.csv"))

examine variation

# plot fitness vs iteration
fitness.df <- rbind(
  cbind(mc_G_log.fitness, id = "Gametocyte\nlog10"),
  cbind(mc_G.fitness, id = "Gametocyte"),
  cbind(mc_R_log.fitness, id = "RBC log10"),
  cbind(mc_R.fitness, id = "RBC"),
  cbind(mc_I_log.fitness, id = "Asexual iRBC\nlog10"),
  cbind(mc_I.fitness, id = "Asexual iRBC"),
  cbind(mc_Ig_log.fitness, id = "Sexual iRBC\nlog10"),
  cbind(mc_Ig.fitness, id = "Sexual iRBC"),
  cbind(mc_I_Ig_log.fitness, id = "Asexual&sexual iRBC\nlog10"),
  cbind(mc_I_Ig.fitness, id = "Asexual&sexual\niRBC")
)

# quantify variance and mean
fitness_var.df <- fitness.df %>% 
  dplyr::group_by(id) %>% 
  dplyr::summarise(median = median(max_fitness)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median))

plot violin with difference in deterministic model fitness and mean model fitness

# get deterministic df
det.df <- data.frame(fitness_var.df, `Maximum fitness` =  c(8.49777, 9.494991,8.854682,9.573291,8.58856,9.561373,8.23991,8.181604,8.569285,9.618812)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median)) %>% 
  tidyr::pivot_longer(-id) %>% 
  mutate(classification = case_when(
    name == "Maximum.fitness" ~"Optimal single infection",
    name == "median" ~"Median Monte Carlo",
    name == "mean" ~ "Mean Monte Carlo"))

mc_a <- ggplot() +
  geom_point(data = det.df, aes(y = id, x = value, shape = classification, color = classification), size = 3, alpha = 0.8) +
  geom_violin(data = fitness.df, aes(y = id, x = max_fitness), fill = "transparent") +
  labs(x = "Fitness", y = "Cue", color = "Fitness", shape = "Fitness") +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_color_manual(values=c("black", "#4575b4", "#fc8d59"))

#————– plot together———————–#

# arrange the heat map
ggarrange(mc_a, mc_partition, ncol = 2, nrow = 1, labels = c("A", "B"), align = "h")
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.

#——————————————–# # dual cue optimization figure #——————————————–#

source(here("functions/chabaudi_si_clean_high.R"))
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/par_to_hm_te.R"))

#———- plotting fitness of dual vs single cue opt ———# # import in previous data

plot

#———– time series conversion rate ————-# # dynamics simulation of high parameter cues (these serve as reference points)

plot

ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 3) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#fc8d59","#fdcb44","black", "#4575b4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))
Warning: Removed 3 rows containing missing values (`geom_line()`).
Warning: Removed 3 rows containing missing values (`geom_point()`).

#———— reaction norm heatmap of R log10 + I log10 ————# # process data

max(R_Ig.dyn$Ig) 
[1] 175754.2

plot

# just testing for sexual iRBC vs RBC
ggplot() +
  geom_path(data = R_Ig.dyn, aes(x = Ig, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_Ig.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = Ig, y = log_R), color = "white") +
  scale_x_continuous(trans = "log10") +
  xlim(0, 200000) +
  labs(y = "RBC log10", x = "Sexual iRBC log10", fill = "Conversion rate") +
  theme_dark()
Scale for x is already present.
Adding another scale for x, which will replace the existing scale.

save figure for poster

dual_rn.pl2 <- ggplot() +
  geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
  scale_fill_viridis_c() +
  geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
  xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
  ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
  labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
  theme_dark() + theme(legend.position="top") 

dual_cr.pl2 <- ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 2) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#4575b4", "#91bfdb","#fc8d59","#fdcb44")) +
  theme_bw() +
  theme(legend.position="top") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))

ggarrange(dual_cr.pl2, dual_rn.pl2, align = "h", widths = c(1.25, 1))
ggsave(here("poster/dual_cue.png"), width = 7, height = 4)

#——– assemble final figure ————-#

# assemble panel B and C
dual_pl.BC <- ggarrange(dual_cr.pl, dual_rn.pl, align = "v", ncol = 1, labels = c("B", "C"))

# assemble panel A
ggarrange(dual_si_fitness.pl, dual_pl.BC, ncol = 2, labels = c("A", ""), widths = c(1, 0.75))
ggsave(here("figures/plos-bio/dual_cue.tiff"), units = "px", width = 2250, height = 1400, scale = 1.5, dpi=300,  bg = "white")

#============================================# # dynamics of dual cue simulation (all combinations) #============================================# # conversion rate

ggsave(here("figures/plos-bio/dual_cue_cr.tiff"), units = "px", width = 2000, height = 1500, dpi=300,  bg = "white")
Warning: Removed 26448 rows containing missing values (`geom_raster()`).

cumultative transmission transmission potential

Proving a point to myself

# it seems that R log+I log only inches ahead of other at the very end of the infection, suggesting that terminal investment is at play
ggplot(data = dual_tau.df_p, aes(x = time, y = value, color = label_plot)) +
  geom_line() +
  scale_fill_viridis_c() +
  xlim(15, 20) +
  ylim(5,10) +
  labs(x = "Time (days)", y = "Dual cue combination", fill = "Conversion rate") +
  theme_bw()
Warning: Removed 12500 rows containing missing values (`geom_line()`).

#============================================# # get dual cue disease map (simulated) #============================================# I am going to take the optimal dynamic (R log10 + I log10) and plot all possible disease curves and see if any follow the hysteresis curve. See below for real life disease curve.

process

plot

ggarrange(plotlist = dual_curve.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve1.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")


ggarrange(plotlist = dual_curve_xlog.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve2.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")


ggarrange(plotlist = dual_curve_ylog.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve3.tiff"), units = "px", width = 1000, height =1000, scale = 1.5, dpi=300,  bg = "white")


ggarrange(plotlist = dual_curve_log.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve4.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")

#============================================# # real life disease curve #============================================# # execute code from report 10 to get final dataset The following graphs will be made: - R vs iRBC - R log10 vs iRBC - R vs iRBC log10 - R log10 vs iRBC log10

R on y-axis

r_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = asex)) +
  geom_path(aes(y = RBC, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = asex)) +
  geom_path(aes(y = log10(RBC), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_ilog.dc <-ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(asex))) +
  geom_path(aes(y = RBC, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(asex))) +
  geom_path(aes(y = log10(RBC), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

G on y-axis

g_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = asex)) +
  geom_path(aes(y = gam, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = asex)) +
  geom_path(aes(y = log10(gam), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

g_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = log10(asex))) +
  geom_path(aes(y = gam, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = log10(asex))) +
  geom_path(aes(y = log10(gam), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

R vs G

r_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = gam)) +
  geom_path(aes(y = RBC, x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = gam)) +
  geom_path(aes(y = log10(RBC), x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "log10(RBC per µL)", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(gam))) +
  geom_path(aes(y = RBC, x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(gam))) +
  geom_path(aes(y = log10(RBC), x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

plot together

#===========================================# # static competition #===========================================# #——- heat map —————# # calculate fitness difference for 20 days

static.ls <- lapply(static.ls, read_parquet)
Error: file must be a "InputStream"

import and process data

plot

ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
Warning: Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.

ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
Warning: Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.
ggsave(here("figures/plos-bio/static_competition_a.tiff"), units = "px", width = 2250, height = 1500, scale = 1.2, dpi=300,  bg = "white")

#—— effect cue perception ——-# ## logging

# get non-logged pairings
static_nolog <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")

static_log <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

static_log.df <- left_join(
  select(static_nolog, cue_1, label_ci_2, log_1, None = fitness_difference),
  select(static_log, cue_1, label_ci_2, log_1, Log = fitness_difference),
  by = c("cue_1", "label_ci_2")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))

combined

static_nocomb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

static_comb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
static_comb.df <- left_join(
  select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_difference),
  select(static_comb, cue_1, label_ci_2, log_1, Total = fitness_difference),
  by = c("cue_1", "log_1", "label_ci_2")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))

plot

ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")

ggsave(here("figures/plos-bio/static_competition_b.tiff"), units = "px", width = 1000, height = 2000, scale = 1.2, dpi=300,  bg = "white")

#===========================================# # invasion analysis #===========================================# # import in data (already 20 days )

invade.df <- read.csv(here("data/ci_invasion.csv"))

process data for invasion matrix

invade.mat <- invade.df %>% 
  group_by(V1 = pmin(mut_id, res_id), V2 = pmax(mut_id, res_id)) %>% # group by cue competition, irregardless of order
  mutate(id_alt = paste0(V1, V2),
         invade = case_when(
           fitness > 0 ~ "invade",
           fitness < 0 ~ "not invade"
         )) %>% 
  group_by(id_alt) %>% 
  mutate(
    mut_is_V1 = case_when(
    mut_id == V1 ~ "V1_invade",
    mut_id != V1 ~ "V1_invaded")) %>% 
  arrange(id_alt) %>% 
  select(fitness, V1, V2, id_alt, invade, mut_is_V1) %>% 
  tidyr::pivot_wider(names_from = mut_is_V1, values_from = fitness) %>% 
  group_by(id_alt) %>% 
  mutate(V1_invade2 = gsub("NA", "", paste0(V1_invade, collapse = "")),
         V1_invaded2 = gsub("NA", "", paste0(V1_invaded, collapse = ""))) %>% 
  distinct(id_alt, .keep_all = T) %>% 
  mutate(
    category = case_when(
    V1_invade2 > 0 & V1_invaded2 > 0 ~ "Mutual invasion",
    V1_invade2 > 0 & V1_invaded2 < 0 ~ "Only strain 1 invasion",
    V1_invade2 < 0 & V1_invaded2 > 0 ~ "Only strain 2 invasion",
    V1_invade2 < 0 & V1_invaded2 < 0 ~ "Mutual non-invasion"
  )) %>% 
  select(V1, V2, invasion = category)

invade.df %>% filter(mut_id == "G-i_none")
invade.df %>% filter(res_id == "G-i_none")
invade.mat4 <- rbind(
  select(invade.mat3, V1_label, V2_label, invasion),
  select(invade.mat3, V2_label = V1_label, V1_label = V2_label) %>% mutate(invasion = NA)) %>%
  mutate(
    invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  )) %>% 
  filter(!is.na(V1_label))
Adding missing grouping variables: `id_alt`
Adding missing grouping variables: `id_alt`

plot invasion matrix

create summary bar chart

# create a stacked barchart for summary
## filter out na
invade.matalt <- invade.mat3 %>% na.exclude()

# get frquency from both sides. Note when grouping for V2, from the perspective of cue 2, scenarrio when strain 2 invade = strain 1 invade
invade.matalt1 <- invade.matalt %>% group_by(V1_label, invasion) %>% 
  summarize(frequency_1 = n())

invade.matalt2 <- invade.matalt %>%
  mutate(invasion_alt = case_when(
    invasion == "Only strain 1 invasion" ~ "Only strain 2 invasion",
    invasion == "Only strain 2 invasion" ~ "Only strain 1 invasion",
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Mutual non-invasion" ~ "Mutual non-invasion"
  )) %>% 
  group_by(V2_label, invasion_alt) %>% 
  summarize(frequency_2 = n())     

# full join and sum. has confirmed all of them add up to 14 
invade.matalt3 <- full_join(invade.matalt1, invade.matalt2, by = c("V1_label" = "V2_label", "invasion" = "invasion_alt"))

invade.matalt3[is.na(invade.matalt3)] <- 0
invade.matalt4 <- invade.matalt3 %>% 
  mutate(freq = frequency_1 + frequency_2) %>% 
  mutate(temp = case_when(
    invasion == "Only strain 1 invasion" ~ freq
  )) %>% 
  group_by(V1_label) %>% 
  mutate(invade_1_freq = max(temp, na.rm = T)) %>% 
  mutate(invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  ))

plot together

ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))

ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/plos-bio/invasion_a.tiff"), units = "px", width = 2250, height = 1100, scale = 1.4, dpi=300,  bg = "white")

#—————- invasion pairwise comparison—————–# ## proces data

log

combined

invade_nocomb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

invade_comb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
invade_comb.df <- left_join(
  select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
  select(invade_comb, cue_1, res_id, log_1, Total = fitness),
  by = c("cue_1", "log_1", "res_id")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
invade_comb.df

plot

ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)

ggsave(here("figures/plos-bio/invasion_b.tiff"), units = "px", width = 2250, height = 850, scale = 1.2, dpi=300,  bg = "white")

#===========================================# # Cue performance across single, co-infection, static, and invasion #===========================================#

plot

#-=====================# # Partitioning best cue #=====================-# #——- single infection ———–# # redo some optimization (lower fitness in no R than default)

source(here("functions/chabaudi_si_clean_R.R"))
source(here("functions/chabaudi_si_clean_N.R"))
# I none
cl <- makeCluster(detectCores()); setDefaultCluster(cl = cl)
I_no_R <- optimParallel(
    par = rep(0.5,4), # start at 0.5x4
    fn = chabaudi_si_clean_R, 
    control = list(trace = 6, fnscale = -1),
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  seq(0, 6*(10^6), by = (6*(10^6))/5000),
    cue = "I",
    log_cue = "none",
    solver = "vode")
stopCluster(cl)
# 0.144021 -43.1046 2030.27 -524.686 
# 8.69589

import and process data

# import in data
si_partition.ls <- list.files(path = here("data/partition/si/"), pattern = "*.csv", full.names = T)
si_partition.ls <- lapply(si_partition.ls, read.csv)
si_partition.df <- do.call(rbind, si_partition.ls)

# combine with si fitness (default)
si_partition.df <- si_partition.df %>% left_join(select(si_fitness.df, id, fitness = value), by = "id")

# make longer
si_partition.df2 <- tidyr::pivot_longer(si_partition.df, c(fitness_R, fitness_N, fitness_W, fitness))

# calculate coefficient of variation. Also rename
si_partition.df2 <- si_partition.df2 %>% 
  group_by(name) %>% 
  mutate(cv = sd(value)/mean(value)*100,
         mean = mean(value),
         category = case_when(
           name == "fitness_R" ~ "No RBC limitation",
           name == "fitness_W" ~ "No targeted immunity",
           name == "fitness_N" ~ "No indiscriminate\nimmunity",
           name == "fitness" ~ "Default"
         ))

plot

library(ungeviz)
# raw fitness
si_partition.pl1 <- ggplot() +
  geom_vpline(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = mean, group = category, color = category), show.legend = F, size = 1) +
  geom_point(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value), size = 2, alpha = 0.7) +
  geom_line(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value, group = id), alpha = 0.2) +
  labs(x = "Fitness", y = "Conditions") +
  theme_bw()

# coefficient of variation
si_partition.pl2 <- ggplot() +
  geom_bar(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = cv), stat = "identity") +
  labs(x = "Coefficient of\nvariation (%)", y = "") +
  theme_bw() +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

si_partition.pl <- ggarrange(si_partition.pl1, si_partition.pl2, widths = c(1, 0.3), align = "h")
si_partition.pl

ggsave(here("figures/plos-bio/partition_fitness.tiff"), width = 7, height = 4)

#——- consequences of no targeted immunity ————# # get dynamics of no targeted immunity

get_dyn <- function(df){
  
  source(here("functions/chabaudi_si_clean_W.R"))
  id <- df$id
  cue <- df$cue
  log <- df$log
  par <- c(df$var_W1, df$var_W2, df$var_W3, df$var_W4)
  cue_range <- seq(df$low, df$high, by = df$by)
  
  # get dynamics
  dyn <- chabaudi_si_clean_W(
    parameters_cr = par,
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  cue_range,
    cue = cue,
    log_cue = log,
    solver = "vode",
    dyn = T
  )
  
  # combine
  dyn2 <- cbind(dyn, id = id, cue = cue, log = log)
  
  write_parquet(dyn2, paste0(here("data/partition/si_dyn/"), id, "_noW_dyn.parquet"))
  
}

get df to run

# join with cue_range
cue_range_si.df <- read.csv(here("data/cue_range_si.csv"))
si_partition.df3 <- si_partition.df %>% left_join(select(cue_range_si.df, low, high, by, id), "id")

# lapply loop
si_partition.ls <- split(si_partition.df3, seq(nrow(si_partition.df3)))
mclapply(si_partition.ls, get_dyn)

process dataframe

# import in dataframe
no_W.ls <- list.files(here("data/partition/si_dyn/"), pattern = "*noW_dyn.parquet", full.names = T)
no_W.df <- lapply(no_W.ls, read_parquet)
no_W.df <- do.call(rbind, no_W.df)

# combine with ez label
ez_label <- read.csv(here("data/ez_label.csv"))
no_W.df <- left_join(no_W.df, ez_label, by = c("id" = "id_si"))

# get conversion rate 
no_W.cr <- no_W.df %>% filter(variable == "cr")
no_W.I <- no_W.df %>% filter(variable == "I")

# get default conversion rate dynamics
si_dyn.df <- left_join(si_dyn.df, ez_label, by = c("id" = "id_si"))
si_dyn.cr <- si_dyn.df %>% filter(variable == "cr")
si_dyn.I <- si_dyn.df %>% filter(variable == "I")

plot conversion rate

mechanism: targeted immunity led to lower parasite density in the initial stages, which prevents parasites from making the switch from no conversion rate to high conversion rate. When parsite density undergoes drastic increase at the beginning due to lower immunity, this presents a higher degree of signal that allows parasite to make the switch appropriately

partition_cr.pl <- ggplot() +
  geom_line(data = no_W.cr, aes(x = time, y= value, color = "No targeted immunity")) +
  geom_line(data = si_dyn.cr, aes(x = time, y= value, color = "Default")) +
  facet_wrap(~ez_label_si, ncol = 5) +
  xlim(0, 20) +
  geom_vline(xintercept = 7) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Condition") +
  theme_bw()

no_W.cr

#—– cue state ————–#

function to get cue states

# function to get cue states
get_cue_state <- function(df){
  cue <- trimws(gsub("_log|_none", "", unique(df$id)))
  if(cue != "I+Ig"){
  df2 <- df %>% filter(variable == cue)
  if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  if(cue == "I+Ig"){
    df2 <- df %>% filter(variable %in% c("I", "Ig")) %>% 
      group_by(time) %>% 
      mutate(value = sum(value))
    
    if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  df2$value[df2$value == -Inf] <- 0
  
  write_parquet(df2, paste0(here("data/partition/si_default_state/"), unique(df$id), "_noW_state.parquet"))
}

run function

# split dynamics based on id
no_W.split <- split(no_W.df, no_W.df$id)

# run function
mclapply(no_W.split, get_cue_state)

# get dataframe
no_W.state <- list.files(here("data/partition/si_state/"), pattern = "*.parquet", full.names = T)
no_W.state <- lapply(no_W.state, read_parquet)
no_W.state <- do.call(rbind, no_W.state)
no_W.state$value[no_W.state$value < 0] <- 0

# get same for si infection
default.split <- split(si_dyn.df, si_dyn.df$id)
mclapply(default.split, get_cue_state)
default.state <- list.files(here("data/partition/si_default_state/"), pattern = "*.parquet", full.names = T)
default.state <- lapply(default.state, read_parquet)
default.state <- do.call(rbind, default.state)
default.state$value[default.state$value < 0] <- 0

# manually correct non-logging
I_Ig.corr <- no_W.state %>% filter(id == "I+Ig_log") %>% 
  mutate(value = log10(value))
I_Ig.corr$value[I_Ig.corr$value < 0] <- 0

no_W.state2 <- no_W.state %>% filter(id != "I+Ig_log")
no_W.state2 <- no_W.state2 %>% rbind(no_W.state2, I_Ig.corr)

plot

absence of targeted immunity led to drastic increase in parasite density in early phases of infection. This produces high signal intensity for parasite and host-based cues, especially non-logged ones, which allows for state differentation. While this can be viewed as a modelling artifiact, it should be noted that the logged cues seldom changed as these changes in early infection did little to alter the actual early signal intensity sensed by the parasite. In an environment where there is heterogeneity in host response, and thus, signal, logging allows for parasites to adapt optimal strategy whereas non-logged cues must contend with sensitivity to immunity.

# function to individually plot stuff
plot_state <- function(df1, df2){
  
  # plot state dynamics
  state_pl <- ggplot() +
  geom_line(data = df1, aes(x = time, y = value, color = name, group = name)) +
  facet_wrap(~ez_label_si, scales = "free") +
  xlim(1,20) +
  theme_bw() +
  theme(legend.position="none") +
  labs(x = "", y = "Cue") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59"))
  
  # plot conversion rate dynamics
  cr_pl <- ggplot() +
  geom_raster(data = df2, aes(x = time, y = name, fill = value)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "none") +
    scale_fill_viridis_c(lim = c(0, 1))
  
  # arrange
  ggarrange(state_pl, cr_pl, ncol = 1, nrow = 2, align = "v", heights = c(1, 0.4))
  ggsave(paste0(here("figures/plos-bio/partition/"), unique(df1$id), ".tiff"), width = 4.5, height = 3.5)
}

split

# combine state
noW_default.state <- left_join(
  select(no_W.state2, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(default.state %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))

noW_default.state2 <- tidyr::pivot_longer(noW_default.state, c(`No targeted\nimmunity`, `Default`))
# combine conversion raster
noW_default.cr <- left_join(
  select(no_W.cr, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(si_dyn.cr %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.cr2 <- tidyr::pivot_longer(noW_default.cr, c(`No targeted\nimmunity`, `Default`))

# split
noW_default_state.ls <- split(noW_default.state2, noW_default.state2$id)
noW_default_cr.ls <- split(noW_default.cr2, noW_default.cr2$id)

# run function
mapply(plot_state, noW_default_state.ls, noW_default_cr.ls)

#——– reaction norms of default vs optimized ————# # get reaction norm and rug data

source(here("functions/par_to_df.R"))

# Gametocyte
g_log.rn <- par_to_df(par = c(1.211521, -3.936778,  -1.312944,  -1.285713), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g_log.rn2 <- par_to_df(par = c(1.393860539, -4.253007616,   -0.313947029,   -2.000857344), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))

g.rn <- par_to_df(par = c(0.04061288,   -9.31445958,    74.13015506,    -431.5984364), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
g.rn2 <- par_to_df(par = c(0.541729073, -3.904616443,   0.87487412, -0.694177021), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))

# I+Ig
I_Ig_log.rn <- par_to_df(par = c(3.594042,  4.157744,   -13.530672, 2.599905), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig_log.rn2 <- par_to_df(par = c(63.71893822,  -87.77671601,   -56.55475514,   -66.02209549), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))

I_Ig.rn <- par_to_df(par = c(0.3159297, -46.1104558,    1250.752908,    -6.1982093), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
I_Ig.rn2 <- par_to_df(par = c(0.731982784,  -21.69799449,   149.7841876,    17.02551769), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))

# convert log to non-logged scale
g_log.rn$cue_range <- 10^(g_log.rn$cue_range)
g_log.rn2$cue_range <- 10^(g_log.rn2$cue_range)
I_Ig_log.rn$cue_range <- 10^(I_Ig_log.rn$cue_range)
I_Ig_log.rn2$cue_range <- 10^(I_Ig_log.rn2$cue_range)

# get rug
g_log.rug <- default.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  select(label_si, value)

g_log.rug2 <- no_W.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  filter(value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig_log.rug <- default.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

I_Ig_log.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

g.rug <- default.state %>% 
  filter(label_si == "G") %>% 
  select(label_si, value)

g.rug2 <- no_W.state %>% 
  filter(label_si == "G" & value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig.rug <- default.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

I_Ig.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

# get rug limits
rug_lim <- rbind(g_log.rug,
                 g_log.rug2,
                 I_Ig_log.rug,
                 I_Ig_log.rug2,
                 g.rug,
                 g.rug2,
                 I_Ig.rug,
                 I_Ig.rug2) %>% 
  group_by(label_si) %>% 
  summarize(max = max(hablar::s(value), na.rm = T),
            min = min(hablar::s(value), na.rm = T))

# combine and filter
rn <- rbind(
  cbind(g_log.rn, label_si = "G log", condition = "Default"),
  cbind(g_log.rn2, label_si = "G log", condition = "No targeted\nimmunity"),
  cbind(g.rn, label_si = "G", condition = "Default"),
  cbind(g.rn2, label_si = "G", condition = "No targeted\nimmunity"),
  cbind(I_Ig_log.rn, label_si = "I+Ig log", condition = "Default"),
  cbind(I_Ig_log.rn2, label_si = "I+Ig log", condition = "No targeted\nimmunity"),
  cbind(I_Ig.rn, label_si = "I+Ig", condition = "Default"),
  cbind(I_Ig.rn2, label_si = "I+Ig", condition = "No targeted\nimmunity")
) %>% 
  left_join(rug_lim, by = "label_si") %>% 
  group_by(label_si) %>% 
  filter(cue_range <= max & cue_range >= min)

# combine rug
rug <- rbind(cbind(g_log.rug, condition = "Default"),
             cbind(g_log.rug2, condition = "No targeted\nimmunity"),
             cbind(g.rug, condition = "Default"),
             cbind(g.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig_log.rug, condition = "Default"),
             cbind(I_Ig_log.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig.rug, condition = "Default"),
             cbind(I_Ig.rug2, condition = "No targeted\nimmunity"))

# cobine with ezlabel
rn2 <- rn %>% left_join(ez_label, by = "label_si")
rug2 <- rug %>% left_join(ez_label, by = "label_si")

# filter rug
default.rug <- rug2 %>% filter(condition == "Default")
no.rug <- rug2 %>% filter(condition == "No targeted\nimmunity")

plot

ggplot() +
  geom_line(data = rn2, aes(x = cue_range, y = cr, color = condition)) +
  geom_rug(data = default.rug, aes(x = value, color = condition), sides = "b") +
  geom_rug(data = no.rug, aes(x = value, color = condition), sides = "t") +
  facet_wrap(~fct_relevel(ez_label_si, c("Gametocyte log10", "Gametocyte", "Asexual&sexual\niRBC log10", "Asexual&sexual iRBC")), scales = "free_x") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE)) +
  theme_bw() +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59")) +
  ylim(0, 1.1) +
  labs(x = "Cue range", y = "Conversion rate", color = "Condition")

ggsave(here("figures/plos-bio/partition_rn.tiff"), width = 7.5, height = 6)

get conversion rate legend

noW_default.cr %>% filter(id == "G_log") %>% 
ggplot() +
  geom_raster(aes(x = time, y = id, fill = Default)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)",
         fill = "Conversion rate") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),) +
    scale_fill_viridis_c(lim = c(0, 1))

ggsave(here("figures/plos-bio/cr_legend.tiff"))

#================================# # Disease curves for single, co-infection, and invasion #===============================# # get data for disease curves

# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics (mon-cue)
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

# dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))

#——- single cue comparison —————# # process data

# process dynamics -> turn skinny
si_dc.df <- si_dyn.df %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(total = I+Ig)
# process dynamics -> turn skinny
si_dc.df <- si_dyn.df %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(total = I+Ig)

plot

#———- co-infection monocue ————-#

# get relevent variables
ci_dc.df <- ci_dyn.df %>% 
  filter(variable == "I1" | variable == "Ig1" | variable == "R")

# morph into skinny format
ci_dc.df <- tidyr::pivot_wider(ci_dc.df, names_from = variable, values_from = value, id_cols = c(time, label)) %>% 
  mutate(total = I1+Ig1)

# good cue bad cue
ci_cue.dv <- ci_fitness.df %>% 
  mutate(classification = case_when(
    value > 2.25 ~ "High-performing",
    value <= 2.25 ~ "Poor-performing"
  ))

# join with classificaiton
ci_dc.df2 <- ci_dc.df %>% left_join(ci_cue.dv, by = "label")

# split into top erforming and poor-performing cues
ci_dc.high <- ci_dc.df2 %>% filter(classification == "High-performing")
ci_dc.poor <- ci_dc.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
ci_dc.high2 <- ci_dc.high %>% left_join(ez_label, by = c("label" = "label_ci"))

#write_parquet(ci_dc.high2, here("data/disease_curve/ci_dc_high.parquet"))
#write_parquet(ci_dc.poor, here("data/disease_curve/ci_dc_poor.parquet"))

plot

ci_dc.poor <- read_parquet(here("data/disease_curve/ci_dc_poor.parquet"))
ci_dc.high2 <- read_parquet(here("data/disease_curve/ci_dc_high.parquet"))

# plot
ci_dc.pre <- ggplot() +
  geom_path(data = ci_dc.poor, aes(x= total, y = R, group = label), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Co-infection\ngood performing cues", x = "Asexual & sexual iRBC per µL", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

ci_dc.pl <- ci_dc.pre +
  geom_point(data = ci_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
  geom_path(data = ci_dc.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb")) +
  guides(color = guide_legend(override.aes = list(size = 0.1)))

#——— dual cue ————————–# # process data

plot

dual_dc.high <- read_parquet(here("data/disease_curve/dual_dc_high.parquet"))
dual_dc.poor <- read_parquet(here("data/disease_curve/dual_dc_poor.parquet"))

dual_dc.high2 <- dual_dc.high %>% 
  filter(label_alt == "R log10 + I log10")

# add 
dual_dc.pre <- ggplot() +
  geom_path(data = dual_dc.poor, aes(x= total, y = R, group = label_alt), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "High-performing\ndual cues per µL", x = "Asexual & sexual iRBC", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)


dual_dc.pl <- dual_dc.pre +
  geom_point(data = dual_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, shape = label_alt), color = "#4575b4", size = 3) +
  geom_path(data = dual_dc.high2, aes(x= total, y = R, group = label_alt), color = "#4575b4", size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme(legend.position = "none") +
  guides(color = guide_legend(override.aes = list(size = 0.1)))

#——— co-infection static —————-#

# import in dynamics data
static_dyn.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static_dyn.ls <- lapply(static_dyn.ls, read_parquet)

# filter variable and transform
static_dyn.ls2 <- mclapply(static_dyn.ls, function(x){
  x %>% 
    filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>% 
    mutate(id_alt = paste(id_1, id_2)) %>% 
    tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
  mutate(total1 = I1+Ig1, total2 = I2+Ig2)
})

static_dc.df <- do.call(rbind, static_dyn.ls2)
static_dc.df <- static_dc.df %>% 
  mutate(id_1 = gsub(" .*", "", id_alt),
         id_2 = gsub(".* ", "", id_alt)) %>% 
  filter(id_1 != id_2)
#write_parquet(static_dc.df, here("data/disease_curve/static_dc.parquet"))

further processing

static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
# get winners and losers
## import in fitness
static_fitness.df <- read.csv(here("data/ci_static.csv"))
## get winner situation
static_fitness.df2 <- static_fitness.df %>% 
  filter(id_1 != id_2) %>% 
  mutate(winning_id = case_when(
    fitness_difference > 0 ~ id_1,
    fitness_difference< 0 ~ id_2
  ),
  losing_id = case_when(
    fitness_difference < 0 ~ id_1,
    fitness_difference> 0 ~ id_2
  ))

# left join
static_dc.df2 <- static_dc.df %>% 
  left_join(select(static_fitness.df2, id_1, id_2, winning_id, losing_id, fitness_difference), by = c("id_1", "id_2"))

# get winner-loser difference in terms of I+Ig also filter out to onyl very strong fitness difference
static_dc.df3 <- static_dc.df2 %>% 
  mutate(total_diff = case_when(
    fitness_difference > 0 ~ total1-total2,
    fitness_difference< 0 ~ total2-total2
  ),
  total_winner = case_when(
    fitness_difference > 0 ~ total1,
    fitness_difference< 0 ~ total2
  ),
  total_loser = case_when(
    fitness_difference > 0 ~ total2,
    fitness_difference< 0 ~ total1
  )) %>% 
  filter(abs(fitness_difference) > 0.5)

plot

static_dc.pl <- ggplot() +
  geom_path(data = static_dc.df3, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  geom_path(data = static_dc.df3, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
          alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
  scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59"))  +
  theme_bw() + 
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1))

#———co-infection invasion —————# # get invasion dynamic

# get invasion df
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))

# get cue range
ci_cue_range <- read.csv(here("data/cue_range_ci.csv"))
invasion_fitness.df2 <- invasion_fitness.df %>% 
  left_join(select(ci_cue_range, id, mut_cue = cue, mut_low = low, mut_high = high, mut_by = by), by = c("mut_id"= "id")) %>% 
  left_join(select(ci_cue_range, id, res_cue = cue, res_low = low, res_high = high, res_by = by), by = c("res_id"= "id"))

function to get dynamic

get_invasion_dyn <- function(df){
  # get cues
  mut_cue <- df$mut_cue
  res_cue <- df$res_cue
  
  # get info of cues (for co infection)
  if(stringr::str_detect(mut_cue, "-i")){mut_cue = gsub("*-i", "1", mut_cue)}
  if(stringr::str_detect(mut_cue, "-i", negate = T)){mut_cue = mut_cue}
  if(stringr::str_detect(res_cue, "-i")){res_cue = gsub("*-i", "2", res_cue)}
  if(stringr::str_detect(res_cue, "-i", negate = T)){res_cue = res_cue}
  
  # get log
  mut_log <- ifelse(stringr::str_detect(df$mut_id, "log"), "log10", "none")
  res_log <- ifelse(stringr::str_detect(df$res_id, "log"), "log10", "none")
  
  # get parameters
  mut_par <- c(df$mut_var1_opt, df$mut_var2_opt, df$mut_var3_opt, df$mut_var4_opt)
  res_par <- c(df$res_var1, df$res_var2, df$res_var3, df$res_var4)
  
  # get cue range
  mut_cue_range <- seq(df$mut_low, df$mut_high, by = df$mut_by)
  res_cue_range <- seq(df$res_low, df$res_high, by = df$res_by)
  
  # get dynamics of co infection
  ci_dyn <- chabaudi_ci_clean(
    parameters_cr_1 = mut_par,
    parameters_cr_2 = res_par,
                  immunity = "tsukushi",
                  parameters = parameters_tsukushi,
                  cue_1 = mut_cue,
                  cue_2 = res_cue,
                  cue_range_1 = mut_cue_range,
                 cue_range_2 = res_cue_range,
                log_cue_1 = mut_log,
                log_cue_2 = res_log,
                solver = "vode",
                time_range = seq(0, 30, 0.001),
    dyn = T)
  
  # append label to all df
 ci_dyn2 <- cbind(ci_dyn, mut_id = df$mut_id, res_id = df$res_id)
  
  # write
 write_parquet(ci_dyn2, paste0(here("data/ci_invasion_dyn/"), df$mut_id, "-", df$res_id, ".parquet"))
}

run dynamic funciton

# get function and parameters
source(here("functions/chabaudi_ci_clean.R"))
parameters_tsukushi <- c(R1 = 8.89*(10^6), # slightly higher
                lambda = 3.7*(10^5),
                mu = 0.025, 
                p = 8*(10^-6), # doubled form original
                alpha = 1, 
                alphag = 2, 
                beta = 5.721, 
                mum = 48, 
                mug = 4, 
                I0 = 43.85965, 
                Ig0 = 0, 
                a = 150, 
                b = 100, 
                sp = 1,
                psin = 16.69234,
                psiw = 0.8431785,
                phin = 0.03520591, 
                phiw = 550.842,
                iota = 2.18*(10^6),
                rho = 0.2627156)
# split
invasion.ls <- split(invasion_fitness.df2, seq(nrow(invasion_fitness.df2)))

# run function
mclapply(invasion.ls, get_invasion_dyn, mc.cores = 4)

process data

# import in invasion dynamics
invasion_dyn.ls <- list.files(path = here("data/ci_invasion_dyn"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls <- lapply(invasion_dyn.ls, read_parquet)

# filter and so on
invasion_dyn.ls2 <- mclapply(invasion_dyn.ls[167:177], mc.cores = 4, function(x){
  x2 <- x %>% 
    filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>% 
    mutate(id_alt = paste(mut_id, res_id)) %>% 
    select(id_alt, time, variable, value) %>% 
    tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
  mutate(total1 = I1+Ig1, total2 = I2+Ig2)
  
  write_parquet(x2, paste0(here("data/disease_curve/ci_invasion/"), unique(x2$id_alt), "_dc.parquet"))
})

# fetch data
invasion_dyn.ls2 <- list.files(path = here("data/disease_curve/ci_invasion"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls2 <- lapply(invasion_dyn.ls2, read_parquet)
invasion_dc.df <- do.call(rbind, invasion_dyn.ls2)
invasion_dc.df <- invasion_dc.df %>% 
  mutate(mut_id = gsub(" .*", "", id_alt),
         res_id = gsub(".* ", "", id_alt)) %>% 
  filter(mut_id != res_id)
#write_parquet(invasion_dc.df, here("data/disease_curve/invasion_dc.parquet"))

further processing

invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
# get winners and losers
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))
invasion_dc.df2 <- invasion_dc.df %>% 
  left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>% 
  mutate(
  total_winner = case_when(
    fitness> 0 ~ total1,
    fitness< 0 ~ total2
  ),
  total_loser = case_when(
    fitness > 0 ~ total2,
    fitness < 0 ~ total1
  )) %>% 
  filter(abs(fitness) > 0.5)

plot

invasion_dc.pl <- ggplot() +
  geom_path(data = invasion_dc.df2, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  geom_path(data = invasion_dc.df2, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
          alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
  scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59"))  +
  theme_bw() + 
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) %>% 
  theme(legend.position = "none")

#——— quantifying disease curve area ————# # function to calculate area between sets of points -> from https://stackoverflow.com/questions/3672260/area-covered-by-a-point-cloud-with-r

library(splancs)
Loading required package: sp

Spatial Point Pattern Analysis Code in S-Plus
 
 Version 2 - Spatial and Space-Time analysis


Attaching package: ‘splancs’

The following object is masked from ‘package:dplyr’:

    tribble
cha<-function(df){
  x <- df$total
  y <- df$R
chull(x,y)->i
return(areapl(cbind(x[i],y[i])))
}
library(splancs)
cha<-function(df){
  x <- df$total
  y <- df$R
chull(x,y)->i
return(areapl(cbind(x[i],y[i])))
}

loop to get area: single infection

# join with fitness
si_fitness.df <- si_fitness.df %>% left_join(ez_label, by = c("id" = "id_si"))
Error in is.data.frame(y) : object 'ez_label' not found

coinfection

# split
ci_dc_high.ls <- split(ci_dc.high2, ci_dc.high2$ez_label)
ci_dc_poor.ls <- split(ci_dc.poor, ci_dc.poor$label)

# run function to find area
ci_dc_high.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_high.ls, cha)), id_alt = names(lapply(ci_dc_high.ls, cha)), value = unique(ci_dc.high2$value))

ci_dc_poor.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_poor.ls, cha)), id_alt = names(lapply(ci_dc_poor.ls, cha)), value = unique(ci_dc.poor$value))

# edit and join
ci_dc_high.area2 <-  ci_dc_high.area %>% 
  select(area, value) %>% 
  mutate(condition = "Co-infection")

ci_dc_poor.area2 <-  ci_dc_poor.area %>% 
  select(area, value) %>% 
  mutate(condition = "Co-infection")

dual cue

# split
dual.dc <- read_parquet(here("data/disease_curve/dual_dc.parquet"))
dual_dc.ls <- split(dual.dc, dual.dc$label_alt)

# get area
dual_dc.area <- cbind.data.frame(area = as.numeric(lapply(dual_dc.ls, cha)), id_alt = names(lapply(dual_dc.ls, cha)))

# bind with fitness
dual_fitness.df <- dual_fitness.df %>% mutate(id_alt = paste(label, "+", label_b))
dual_dc.area2 <- dual_dc.area %>% 
  left_join(dual_fitness.df, by = "id_alt") %>% 
  select(area, value) %>% 
  mutate(condition = "Dual-cue") %>% 
  filter(value > 2)
dual_dc.area2

#—— get fitted scatter plot for all single infection, co infection, and dual cue ——–#

# plot
library("ggpmisc")
Loading required package: ggpp

Attaching package: ‘ggpp’

The following object is masked from ‘package:ggplot2’:

    annotate

#——- plot together with disease curve ——–#

# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "h", widths = c(1, 0.35))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "h", widths = c(1, 0.35))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
# co-infection
ci_vir.pl <- ggarrange(ci_dc.pl, ci_area.pl, align = "h", widths = c(1, 0.35))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.

#——— static area comparison ————-# # compute area

# import in dc dynamic and fitness
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
static_fitness.df <- read.csv(here("data/ci_static.csv"))

# get winner and loser
static_dc.df4 <- static_dc.df %>% 
  left_join(select(static_fitness.df, id_1, id_2, fitness_difference), by = c("id_1", "id_2")) %>%
  filter(id_1 != id_2) %>% 
  mutate(
  total_winner = case_when(
    fitness_difference > 0 ~ total1,
    fitness_difference< 0 ~ total2
  ),
  total_loser = case_when(
    fitness_difference > 0 ~ total2,
    fitness_difference< 0 ~ total1
  ))%>% 
  filter(abs(fitness_difference) > 0.5)

# split by winner and loser
static_dc.ls1 <- split(select(static_dc.df4, R, total = total_winner), static_dc.df4$id_alt)
static_dc.ls2 <- split(select(static_dc.df4, R, total = total_loser), static_dc.df4$id_alt)

# get area
static_win.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls1, cha)), status = "Winner")
static_loser.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls2, cha)), status = "Loser")

# pair
static.area <- cbind(select(static_win.area, Winner = area),
                     select(static_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))

plot static

#——— invasion area comparison —————–# # get area

# import in dc dynamic and fitness
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))

invasion_dc.df4 <- invasion_dc.df %>% 
  left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>% 
  mutate(
  total_winner = case_when(
    fitness> 0 ~ total1,
    fitness< 0 ~ total2
  ),
  total_loser = case_when(
    fitness > 0 ~ total2,
    fitness < 0 ~ total1
  )) %>% 
  filter(abs(fitness) > 0.5)

# split by winner and loser
invasion_dc.ls1 <- split(select(invasion_dc.df4, R, total = total_winner), invasion_dc.df4$id_alt)
invasion_dc.ls2 <- split(select(invasion_dc.df4, R, total = total_loser), invasion_dc.df4$id_alt)

# get area
invasion_win.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls1, cha)), status = "Winner")
invasion_loser.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls2, cha)), status = "Loser")

# pair
invasion.area <- cbind(select(invasion_win.area, Winner = area),
                     select(invasion_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))

plot

#—— plot together ————-#

# pairwise comparison for static and invasive comeptition
heterocue_comp.pl <- ggarrange(static_area.pl, invasion_area.pl, ncol = 2, nrow = 1, align = "v")

# join inthe other disease curves
ggarrange(si_vir.pl, ci_vir.pl, dual_vir.pl, heterocue_comp.pl, ncol = 2, nrow = 2, labels = c("A", "B", "C", "D"), heights = c(1, 0.8))

ggsave(here("figures/plos-bio/virulence.tiff"), units = "px", width = 2250, height = 1600, scale = 1.9, dpi=300, bg = "white")

#====================================# # dual cue dynamics figure #===================================#

get dual dynamics

dual.dyn <- chabaudi_si_clean(
  parameters_cr = c(4.446192033,    10.97518275,    1.38762817, 23.3059254, -3.452052371,   -18.0070692,    39.66614226,    -3.545193141,   18.78350799),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 30, by = 1e-3),
  cue_range =  seq(6, 7, by = 1/500),
  cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
  cue = "R",
  cue_b = "I",
  log_cue = "log10",
  log_cue_b = "log10",
  solver = "vode",
  dyn = T
)

# filter out relevent dataframes
dual.dyn_f <- dual.dyn %>% 
  filter(variable %in% c("I", "Ig", "G", "R", "N", "W"))

# cr only
dual.dyn_cr <- dual.dyn %>% filter(variable == "cr")

plot

dual_I.plt <- ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "I"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "I" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Asexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_Ig.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "Ig"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "Ig" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Sexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_G.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "G"), aes(x = time, y = value/(10^4)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "G" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^4)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^4, name="Gametocyte per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_R.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "R"), aes(x = time, y = value/(10^7)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "R" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^7)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^7, name="RBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_N.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "N"), aes(x = time, y = value*10),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "N" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*10), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.1, name="Indiscriminate immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_W.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "W"), aes(x = time, y = value*2),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "W" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*2), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.5, name="Targeted immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

plot together

ggarrange(dual_I.plt, dual_Ig.plt, dual_G.plt, dual_R.plt, dual_N.plt, dual_W.plt, nrow = 3, ncol = 2, align = "hv", labels = c("A", "B", "C", "D", "E", "F"))
ggsave(here("figures/plos-bio/dual_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1, dpi=300, bg = "white")

#======================================# # Single and co-infection verification #======================================# # single infection

# import in all single infection data
si_val.ls <- list.files(path = here("data/si_validation"), pattern = "*.csv", full.names = T)

si_val.df <- lapply(si_val.ls, read.csv)
si_val.df <- do.call(rbind, si_val.df)

# get max fitness from simulation. left join with si_opt
si_opt.df <- read.csv(here("data/si_opt.csv"))

# we can see that all of the randomly simulated models have a fitness value that is less than the optimized model
si_val.df2 <- select(si_val.df, V1, id) %>%
  left_join(si_opt.df, by =c("id" = "id")) %>% 
  mutate(fitness_difference = fitness_20 - V1) %>% 
  left_join(select(ez_label, id_si, ez_label_si), by = c("id" = "id_si"))

Model validation

plot

si_val.plt <- ggplot(data = si_val.df2, aes(x = fitness_difference)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label_si, scales = "free", ncol = 3) +
  labs(x = "Optimized fitness - random fitness", y = "Frequency") +
  theme_bw()

ci_val.plt <- ggplot(data = ci_val.df2, aes(x = V1)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label, scales = "free", ncol = 4) +
  labs(x = "Fitness difference between\noptimized and random strain", y = "Frequency") +
  theme_bw()

ggarrange(si_val.plt, ci_val.plt, align = "hv", labels = c("A", "B"), widths = c(3,4))
ggsave(here("figures/plos-bio/validation.tiff"), units = "px", width = 2250, height = 1300, scale = 1.6, dpi=300, bg = "white")

#=========================# # Monte carlo dynamics supplementary #=========================# # run code in report 16

mc_dyn_a <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = cr)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = cr_bot, ymax = cr_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Conversion rate") +
  theme_bw()
  
# plot fitness timeseries. When if tiness lost? At the latter part
mc_dyn_b  <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = tau)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = tau_bot, ymax = tau_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Transmission potential") +
  theme_bw()

ggarrange(mc_dyn_a, mc_dyn_b, ncol = 1, align = "v", labels = c("A", "B"))
Warning: Removed 1 row containing missing values (`geom_line()`).
ggsave(here("figures/plos-bio/MC_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1.3, dpi=300, bg = "white")

---
title: "R Notebook"
output: html_notebook
---
```{r}
library(dplyr)
library(ggplot2)
library(forcats)
library(here)
library(deSolve)
library(crone)
library(optimParallel)
library(doParallel)
library(doRNG)
library(arrow)
library(stringr)
library(parallel)
library(ggpubr)
library(scales)
```

Notebook for plotting all of the figures for PloS Biology manuscript submission

Guidelines: taken from https://journals.plos.org/plosbiology/s/figures#loc-figure-file-requirements
1. format: eps
2. max file size: 10 MB
3. text size: Arial, Times, or Symbol font only in 8-12 point
2. figure size: Width: 789 – 2250 pixels (at 300 dpi). Height maximum: 2625 pixels (at 300 dpi).


#=========================================#
figure 1: best single and co-infection cue
#=========================================#
Figure displaying the reaction norms of best single and co-infection.

#------- optimal cue reaction norm -----------#
# read data
```{r}
# single infection dynamics, reaction norms, and rugs
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 
si_rn.df <- read_parquet(here("data/si_dyn/si_rn.parquet"))
si_rug.df <- read_parquet(here("data/si_dyn/si_rug.parquet")) %>% 
  distinct(value, label, .keep_all = T)

# co-infection dynamics, reaction norms, and rugs
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
ci_rn.df <- read_parquet(here("data/ci_dyn/ci_rn.parquet"))
ci_rug.df <- read_parquet(here("data/ci_dyn/ci_rug.parquet")) %>% 
  distinct(value, label, .keep_all = T)

ci_rug.df
```

# process data for reaction norm
```{r}
# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))

# get si_label with ci cue range
si_ci_rug.df <- ci_rug.df %>% 
  mutate(label_si = case_when(
    label %in% c("I", "I1+I2") ~ "I",
    label %in% c("I log","I1+I2 log") ~ "I log",
    label %in% c("Ig", "Ig1+Ig2") ~ "Ig",
    label %in% c("Ig log") ~ "Ig log",
    label %in% c("sum", "I+Ig") ~ "I+Ig",
    label %in% c("sum log", "I+Ig log") ~ "I+Ig log",
    label == "R" ~ "R",
    label == "R log" ~ "R log",
    label %in% c("G", "G1+G2") ~ "G",
    label == "G log" ~ "G log"
  )) 

# get limit for si_rug
si_rug_lim.df <- si_rug.df %>% 
  filter(time <= 20) %>%
  group_by(label)%>% 
  summarise(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  select(label_si = label, min_si = min, max_si = max)

# filter to restriction conversion rate reaction norm range to cue ranges that appear in rug
## change to Inf/-inf to NA. Note that I am first joining with si rug lim to check which limit is larger, We will go with the cue range that has the largest span
ci_rug_lim.df <- si_ci_rug.df %>% 
  group_by(label) %>% 
  mutate(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  distinct(label, .keep_all = T) %>% 
  select(label, label_si, min, max)

rug_lim.final <- ci_rug_lim.df %>% left_join(si_rug_lim.df, by = "label_si") %>% 
  mutate(final_min = min(min, min_si),
         final_max = max(max, max_si))

# get second rug_lim.final for single infection
rug_lim.final2 <- rug_lim.final %>% 
  group_by(label_si) %>% 
  mutate(final_min = min(final_min, na.rm = T),
         final_max = max(final_max, na.rm = T))

# filter ci_rn by limit
ci_rn.df2 <- ci_rn.df %>% 
  left_join(rug_lim.final, by  = "label") %>% 
  group_by(label) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  arrange(cue_range, .by_group = T) %>% 
  filter(row_number() %% 10 == 0) 
```

# match single infection rn with coinfection 
```{r}
# get ci label to si rug and filter by limit
si_rn.df2 <- left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c("label" = "label_si")) %>% 
  left_join(rug_lim.final, by  = c("label_ci" = "label")) %>% 
  group_by(label_ci) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  arrange(cue_range, .by_group = T) %>% 
  filter(row_number() %% 10 == 0) %>% 
  select(cue_range, cr, label_ci, label_si)

# get ci label to si rug, we will keep one unique value per label
si_rug.df2 <- select(si_rug.df, value, label_si = label) %>% distinct(value, label_si)
```

# plot reaction norm
```{r}
# join with ezlabel
ci_rn.df3 <- ci_rn.df2 %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rn.df3 <- si_rn.df2 %>% left_join(ez_label, by = "label_ci")
ci_rug.df3 <- ci_rug.df %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rug.df3 <- si_rug.df2 %>% left_join(ez_label, by = "label_si")
```


```{r}
# redo order of cues
ci_rn.df3$ez_label <- factor(ci_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10")) 

si_rn.df3$ez_label <- factor(si_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

ci_rug.df3$ez_label <- factor(ci_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

si_rug.df3$ez_label <- factor(si_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))
# plot
ggplot() +
  geom_line(data = ci_rn.df3, aes(x = cue_range, y = cr, color = "Co-infection")) +
  geom_point(data = ci_rn.df3 %>% 
    group_by(label) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Co-infection", shape = "Co-infection"), size = 2) +
  geom_line(data = si_rn.df3, aes(x = cue_range, y = cr, color = "Single infection")) +
  geom_point(data = si_rn.df3 %>% 
    group_by(label_ci) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Single infection", shape = "Single infection"), size = 2) +
  geom_rug(data = ci_rug.df3, aes(x = value), color = "#4575b4", sides = "t", length = unit(0.1, "npc")) +
  geom_rug(data = si_rug.df3, aes(x = value), color = "#fc8d59", sides = "b", length = unit(0.1, "npc")) +
  facet_wrap(~ez_label, scales = "free_x", ncol = 2) +
  ylim(-0.3, 1.3) +
  theme_bw() +
  labs(y = "Conversion rate", x = "Cue range", color = "Model", shape = "Model") +
  scale_x_continuous(labels = function(x) format(x, scientific = T),
                     guide = guide_axis(check.overlap = TRUE)) + 
                    theme(axis.text.x = element_text(size = 7),
                          legend.position = "right")  +
  scale_color_manual(values=c( "#4575b4", "#fc8d59")) +
  theme(strip.text.x = element_text(margin = margin(b = 0.5, t = 0.5)))

ggsave(units = "px", dpi = 300, width = 2000, height = 2500, filename = here("figures/plos-bio/reaction_norm.tiff"), bg = "white", scale = 1.1)
```

#=========================================#
Plotting single and co-infection fitness scatter plot
#=========================================#
# import in data
```{r}
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

ez_label <- read.csv(here("data/ez_label.csv"))
```

# process for final 20 days fitness
```{r}
# get single infection maximum tau_cum for 20 days
si_fitness.df <- si_dyn.df %>% 
  filter(variable == "tau_cum" & time == 20)

# get co-infection maximum tau_cum for 20 days
ci_fitness.df <- ci_dyn.df %>% 
  filter(variable == "tau_cum1" & time == 20)

# join together two dataframes and add labels
si_ci_fitness.df <- select(ci_fitness.df, fitness_ci = value, label_ci = label) %>% 
  left_join(ez_label, by = "label_ci") %>% 
  left_join(select(si_fitness.df, fitness_si = value, id_si = id), by = "id_si") %>% 
  select(ez_label_si, ez_label, fitness_si, fitness_ci) %>% 
  rbind(data.frame( # add time
    ez_label_si = "Time", 
    ez_label = "Time",
    fitness_si =  9.787899,
    fitness_ci  = 2.311841
  ))

si_ci_fitness.df
```

# plot scatter point of single infection vs co-infection
```{r}
si_ci_fitness.pl <- ggplot() +
  geom_point(data = si_ci_fitness.df, aes(x = fitness_si, y = fitness_ci, color = ez_label_si, shape = ez_label_si), size = 3.5) +
  ggrepel::geom_label_repel(data = si_ci_fitness.df, aes(label = ez_label, x = fitness_si, y = fitness_ci),
                            fill = "white",xlim = c(-Inf, Inf), ylim = c(NA, NA)) +
  labs(x = "Maximum single infection fitness", y = "Maximum Co-infection fitness (per strain)",
       color = "Single infection cue", shape = "Single infection cue") +
  scale_shape_manual(values = 15:25) +
  lims(x = c(8, 10.3)) +
  theme_bw()
```
#=========================================================#
# time series conversion rate for single and co-infection
#=========================================================#
#---------get "ideal" single infection and co-infection dynamics---------#
This is when stuff are optimized based on time
```{r}
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/chabaudi_ci_clean.R"))

# single infection dynamic with time as cue. Optimized using local optimizer. Note that the time variable in dual cue is slightly different with higher flexibility. While that increases the fitness value by ~0.1, the overall conversion rate dynamic does not change that much
si_t.df <- chabaudi_si_clean(
  parameters_cr = c(4.55386, -13.0056, 4.15466, -11.9424),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, 20, by = 1e-3),
  cue = "t",
  solver = "vode",
  dyn = T)

# co-infection dynamic with time as cue
ci_t.df <- chabaudi_ci_clean(parameters_cr_1 = c(26.16425,	-71.07799,	53.34121,	-166.25693),
                            parameters_cr_2 = c(26.16425,	-71.07799,	53.34121,	-166.25693),
                            immunity = "tsukushi",
                            parameters = parameters_tsukushi,
                            time_range = time_range, 
                            cue_1 =  "t",
                            cue_2= "t",
                            cue_range_1 = time_range, # cue range of strain 1
                            cue_range_2 = time_range, # cue range of strain 2
                            log_cue_1 = "none", # whether to log transform cue 1
                            log_cue_2 = "none", # whether to log transform cue 2
                            solver = "vode", # solver for numerical integration. Vode often gives faster runs
                              dyn = T)

# get only conversion rate and make same format as si_cr.df2/ci_cr.df2
si_t.df %>% filter(time == 20 & variable == "tau_cum") ## fitness value of time as cue in single infection
si_t.cr <- si_t.df %>% 
  filter(variable == "cr") %>% 
  mutate(ez_label_si = "Time", 
         fitness_si =9.787899) %>% 
  select(time, value, fitness_si, ez_label_si)


## get fitness value of co-infection time
ci_t.df %>% filter(variable == "tau_cum1") %>% summarize(max = max(value, na.rm = T))
ci_t.cr <- ci_t.df %>% 
  filter(variable == "cr_1") %>% 
  mutate(ez_label = "Time", 
         fitness_ci = 2.311841,
         label = "Time") %>%
  select(time, value, fitness_ci, ez_label, label)
```

#--------- single infection conversion rate heat map--------------#
# process info for single infection
```{r}
# get conversion rate dynamics
si_cr.df <- si_dyn.df %>% 
  filter(time <= 20 & variable == "cr")

# get time ranges. We will omit any values where I is below 100. Note for all single infection, it is form  0.55 to 20. Note we are not doing I+Ig because conversion rate is only relevent when asexual iRBC is around to differentiate!
si_dyn.df %>% 
  filter(time <= 20 & variable == "I" & value > 100) %>%
  group_by(id) %>% 
  summarize(min_time = min(time),
            max_time = max(time))

# join fitness and ideal dynamics
si_cr.df2 <- si_cr.df %>% 
  left_join(select(si_fitness.df, id, fitness_si = value), by = "id") %>% 
  left_join(ez_label, by = c("id" = "id_si")) %>% 
  select(time, value, fitness_si, ez_label_si) %>% 
  rbind(si_t.cr) # join with dataframe containing ideal dynamics

# plot. note that we are omitting day 1 conversion rate dynamic because all cr at that stage is 0 (by model default)
si_cr.pl <- ggplot(data = si_cr.df2, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si))) +
  geom_raster(aes(fill = value)) +
  labs(x = "Time (days)", y = "Single infection cues", fill = "Conversion rate") +
  viridis::scale_fill_viridis(limits = c(0, 1), oob = scales::squish) +
  xlim(1, 20) +
  theme_bw()
```

# -----------------co-infection conversion rate heatmap-----------#
# plot co-infeciton convesion rate heatmap
```{r}
# get conversion rate dynamics
ci_cr.df <- ci_dyn.df %>% 
  filter(time <= 20 & variable == "cr_1")

# get time range to include. Same criteria. we need if single strain population declines to less than 100
## when time is used as a cue: 12.943
ci_t.df %>%
  filter(time <= 20 & variable == "I1" & value > 100) %>% 
  summarize(max_time = max(time))
## all other dynamics
ci_cr.time <- ci_dyn.df %>% 
  filter(time <= 20 & variable == "I1" & value > 100) %>% 
  group_by(label) %>% 
  summarize(max_time = max(time)) %>% 
  rbind(data.frame(label = "Time", max_time = 12.943))

# join with fitness, ideal dynamics and also maximum time range considered
ci_cr.df2 <- ci_cr.df %>% 
  left_join(select(ci_fitness.df, label, fitness_ci = value), by = "label") %>% 
  left_join(ez_label, by = c("label" = "label_ci")) %>% 
  select(time, value, fitness_ci, ez_label, label) %>% 
  rbind(ci_t.cr) %>% # bind ideal dynamics
  left_join(ci_cr.time, by = "label") %>% 
  filter(time <= max_time) # keep only those that are within time range

# plot
ci_cr.pl <- ggplot() +
  geom_raster(data = ci_cr.df2, 
              aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
  labs(x = "Time (days)", y = "Co-infection cues", fill = "Conversion rate") +
  viridis::scale_fill_viridis(limits = c(0, 1), oob = scales::squish) +
  xlim(1, 20) +
  theme_bw()
```

#--------- assemble final figure --------------#
```{r}
# combine conversion rate dynamic of single infection and co-infection
cr.pl <- ggarrange(si_cr.pl, ci_cr.pl, labels = c("B", "C"), ncol = 2, common.legend = T, align = "h")

# combine with fitness scatterplot
ggarrange(si_ci_fitness.pl, cr.pl, ncol = 1, labels = c("A", ""), align = "hv")

# save
ggsave(units = "px", dpi = 300, width = 2000, height = 2000, filename = here("figures/plos-bio/fitness_cr-dyn.tiff"), bg = "white", scale = 1.35)
```

#===========================================================#
# Demographic stochasticity
#===========================================================#
#---------- plot heat map---------------#
# import in all fitness files
```{r}
file_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv", full.names = T)
name_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv")
name_ls <- gsub("*.csv", "", name_ls)

# 60, which is about right
length(file_ls)

# read in files
fitness.ls <- lapply(file_ls, read.csv)

# assign unique ID
fitness.ls <- mapply(cbind, fitness.ls, "ID" = name_ls, SIMPLIFY = F)
```

# process data
```{r}
# get metainfo from ID
fitness.ls2 <- mclapply(fitness.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- unlist(str_split(unique(id_col), pattern = "_"))[[4]]
  rand_var <- unlist(str_split(unique(id_col), pattern = "_"))[[5]]
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = rand_var, mean_fitness = mean_fitness, sd_fitness = sd_fitness)
  return(res)
})
```

# Get reference data
```{r}
reference_ls <- list.files(path = here("data/MC2"), pattern = "*.csv", full.names = T)
reference_name.ls <- gsub("*.csv", "", list.files(path = here("data/MC2/"), pattern = "*.csv"))

# read in the files
reference.ls <- lapply(reference_ls, read.csv)

# assign unique ID
reference.ls <- mapply(cbind, reference.ls, "ID" = reference_name.ls, SIMPLIFY = F)

# get meta data
reference.ls2 <- mclapply(reference.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[2]]
  # get log
  third_col <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- ifelse(third_col == "log", "log10", "none")
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = "all", ref_mean_fitness = mean_fitness, ref_sd_fitness = sd_fitness)
  return(res)
})
```

# combine MC partitioned and reference df
```{r}
# get unique column values for each cue, log, and rand_var combo
fitness.ls3 <- do.call(rbind, fitness.ls2)
fitness.ls3 <- fitness.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# repeat with reference
reference.ls3 <- do.call(rbind, reference.ls2)
reference.ls3 <- reference.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# combine!
ref_fit.df <- left_join(fitness.ls3, reference.ls3, by = c("cue" = "cue", "log"= "log"))
```

# compute proportion fitness and variation
```{r}
ref_fit.df2 <- ref_fit.df %>% 
  mutate(p_sd = sd_fitness/ref_sd_fitness,
         p_mean = ref_mean_fitness/mean_fitness,
         cue_log = paste0(cue, "_", log),
         label = case_when(
           cue == "G" ~ "Gametocyte",
           cue == "I" ~ "Asexual iRBC",
           cue == "I+Ig" ~ "Asexual&sexual\niRBC",
           cue == "Ig" ~ "Sexual iRBC",
           cue == "R" ~ "RBC"
           ),
         parameter = case_when(
           rand_var.x == "rho" ~ "RBC replenishment (ρ)",
           rand_var.x == "phin" ~ "Half-life indis (ϕn)",
           rand_var.x == "phiw"~ "Half-life targeted (ϕw)",
           rand_var.x == "psin" ~ "Activation indis (ψn)",
           rand_var.x == "psiw" ~ "Activation targeted (ψw)",
           rand_var.x == "beta" ~ "Burst size (β)"
         ))
```

# plot!
```{r}
# variation
mc_b <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_sd)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(sd("1 parameter randomized"), sd("all parameters randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

# mean fitness
mc_c <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_mean)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(Mean("all parameters randomized"), Mean("1 parameter randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

mc_partition <- ggarrange(mc_b, mc_c, ncol = 1)

```

#-------------- get violine plot of variation in fitness --------------------#
# read MC data
```{r}
# read in dymamics
mc_G_log.dyn <- read_parquet(here("data/MC2/mc_G_log_dyn.parquet"))
mc_G.dyn <- read_parquet(here("data/MC2/mc_G_dyn.parquet"))
mc_R_log.dyn <- read_parquet(here("data/MC2/mc_R_log_dyn.parquet"))
mc_R.dyn <- read_parquet(here("data/MC2/mc_R_dyn.parquet"))
mc_I_log.dyn <- read_parquet(here("data/MC2/mc_I_log_dyn.parquet"))
mc_I.dyn <- read_parquet(here("data/MC2/mc_I_dyn.parquet"))
mc_Ig_log.dyn <- read_parquet(here("data/MC2/mc_Ig_log_dyn.parquet"))
mc_Ig.dyn <- read_parquet(here("data/MC2/mc_Ig_dyn.parquet"))
mc_I_Ig_log.dyn <- read_parquet(here("data/MC2/mc_I+Ig_log_dyn.parquet"))
mc_I_Ig.dyn <- read_parquet(here("data/MC2/mc_I+Ig_dyn.parquet"))

# read in fitness
mc_G_log.fitness <- read.csv(here("data/MC2/mc_G_log_fitness.csv"))
mc_G.fitness <- read.csv(here("data/MC2/mc_G_fitness.csv"))
mc_R_log.fitness <- read.csv(here("data/MC2/mc_R_log_fitness.csv"))
mc_R.fitness <- read.csv(here("data/MC2/mc_R_fitness.csv"))
mc_I_log.fitness <- read.csv(here("data/MC2/mc_I_log_fitness.csv"))
mc_I.fitness <- read.csv(here("data/MC2/mc_I_fitness.csv"))
mc_Ig_log.fitness <- read.csv(here("data/MC2/mc_Ig_log_fitness.csv"))
mc_Ig.fitness <- read.csv(here("data/MC2/mc_Ig_fitness.csv"))
mc_I_Ig_log.fitness <- read.csv(here("data/MC2/mc_I+Ig_log_fitness.csv"))
mc_I_Ig.fitness <- read.csv(here("data/MC2/mc_I+Ig_fitness.csv"))
```

# examine variation
```{r}
# plot fitness vs iteration
fitness.df <- rbind(
  cbind(mc_G_log.fitness, id = "Gametocyte log10"),
  cbind(mc_G.fitness, id = "Gametocyte"),
  cbind(mc_R_log.fitness, id = "RBC log10"),
  cbind(mc_R.fitness, id = "RBC"),
  cbind(mc_I_log.fitness, id = "Asexual iRBC log10"),
  cbind(mc_I.fitness, id = "Asexual iRBC"),
  cbind(mc_Ig_log.fitness, id = "Sexual iRBC log10"),
  cbind(mc_Ig.fitness, id = "Sexual iRBC"),
  cbind(mc_I_Ig_log.fitness, id = "Asexual & sexual iRBC log10"),
  cbind(mc_I_Ig.fitness, id = "Asexual & sexual iRBC")
)

# quantify variance and mean
fitness_var.df <- fitness.df %>% 
  dplyr::group_by(id) %>% 
  dplyr::summarise(median = median(max_fitness),
                   mean = mean(max_fitness)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median))
```

# plot violin with difference in deterministic model fitness and mean model fitness
```{r}
# get deterministic df
det.df <- data.frame(fitness_var.df, `Maximum fitness` =  c(8.49777, 9.494991,8.854682,9.573291,8.58856,9.561373,8.23991,8.181604,8.569285,9.618812)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median)) %>% 
  tidyr::pivot_longer(-id) %>% 
  mutate(classification = case_when(
    name == "Maximum.fitness" ~"Optimal single infection",
    name == "median" ~"Median Monte Carlo",
    name == "mean" ~ "Mean Monte Carlo"))

mc_a <- ggplot() +
  geom_point(data = det.df, aes(y = id, x = value, shape = classification, color = classification), size = 3, alpha = 0.8) +
  geom_violin(data = fitness.df, aes(y = id, x = max_fitness), fill = "transparent") +
  labs(x = "Fitness", y = "Cue", color = "Fitness", shape = "Fitness") +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_color_manual(values=c("black", "#4575b4", "#fc8d59"))
```

#-------------- plot together-----------------------#
```{r}
# arrange the heat map
ggarrange(mc_a, mc_partition, ncol = 2, nrow = 1, labels = c("A", "B"), align = "h")

# save
ggsave(units = "px", dpi = 300, width = 2100, height = 1400, filename = here("figures/plos-bio/MC.tiff"), bg = "white", scale = 1.5)
```


#--------------------------------------------#
# dual cue optimization figure
#--------------------------------------------#
```{r}
source(here("functions/chabaudi_si_clean_high.R"))
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/par_to_hm_te.R"))
```

#---------- plotting fitness of dual vs single cue opt ---------#
# import in previous data
```{r}
# dual cue fitness
dual_fitness.df <- read.csv(here("data/dual_cue_opt4/dual_cue_fitness_20.csv"))
## make label and filter out very low fitness
dual_fitness.df <- dual_fitness.df %>% 
  mutate(temp_label = gsub("log", "log10", label),
         temp_label_b = gsub("log", "log10", label_b),
         label_final = paste0(temp_label, "+", temp_label_b)) %>% 
  filter(value > 2)

# get single cue fitness
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 
si_fitness.df <- si_dyn.df %>% 
  filter(variable == "tau_cum" & time == 20)

# join si and dual cue
dual_si_fitness.df <- dual_fitness.df %>% 
  left_join(select(si_fitness.df, id, si_fitness = value), by = "id") %>% 
  left_join(select(si_fitness.df, id_b = id, si_fitness_b = value), by = "id_b") %>% 
  mutate(si_fitness_max = ifelse(si_fitness > si_fitness_b, si_fitness, si_fitness_b),
         dual_label = gsub("log", "log10", paste(label, "+", label_b)))
```

# plot
```{r}
dual_si_fitness.pl <- ggplot() +
  geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = value, color = "Dual cue", shape = "Dual cue"), size = 3, alpha = 0.7) +
  geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = si_fitness_max, color= "Best single cue", shape= "Best single cue"), size = 3, alpha = 0.7) +
  labs(x = "Fitness", y = "Dual cue", color = "Cue", shape = "Cue") +
  scale_color_manual(values=c("#fc8d59", "#4575b4")) +
  geom_vline(xintercept = 9.883602) +
  geom_text(aes(x=9.883602, label="\nIdeal fitness (df=9)", y = "G + R log10"), angle=90) +
  theme_bw()
```


#----------- time series conversion rate -------------#
# dynamics simulation of high parameter cues (these serve as reference points)
```{r}
# best dual cue combo
dual.cr <- chabaudi_si_clean(
  parameters_cr = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(6, 7, by = 1/500),
  cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
  cue = "R",
  cue_b = "I",
  log_cue = "log10",
  log_cue_b = "log10",
  solver = "vode",
  dyn = T
)

# when time is used as a cue (high parameter)
time.cr <- chabaudi_si_clean_high(
  parameters_cr = c(9.154314,  -7.570829, -22.506638 ,  3.382405 ,-13.453519 ,-17.011485  , 3.678181, -12.851895 ,-26.115158),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, 20, by = 1e-3),
  cue = "t",
  solver = "vode",
  dyn = T)

# when asexual iRBC is used as a cue (high flexibility)
I_high.cr <- chabaudi_si_clean_high(
  parameters_cr = c(1.296675,  3.544034 , 4.907484,  2.174249, -3.238309 ,-5.181614 ,-1.645072 , 1.834302 , 1.581011),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000),
  cue = "I",
  log_cue = "log10",
  solver = "vode",
  dyn = T)

# when asexual iRBC is used as a cue (normal flexibility)
I.cr <- chabaudi_si_clean(
  parameters_cr = c(5.463558,	2.383948,	-17.757281,	4.571835),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000),
  cue = "I",
  log_cue = "log10",
  solver = "vode",
  dyn = T)

# process 
I_high.cr2 <- I_high.cr %>% filter(variable == "cr") %>% mutate(label_new = "I log10 (df=9)") %>% select(-variable)

I.cr2 <- I.cr %>% filter(variable == "cr") %>% mutate(label_new = "I log10 (df=3)") %>% select(-variable)

time_high.cr2 <- time.cr %>% filter(variable == "cr") %>% mutate(label_new = "Ideal (df=9)") %>% select(-variable)

dual.cr2 <- dual.cr %>% filter(variable == "cr") %>% mutate(label_new = "R log10 + I log10 (df=9)") %>% select(-variable)

# combine
dual_cr.df <- rbind(I_high.cr2, I.cr2, time_high.cr2, dual.cr2)
```

# plot
```{r}
dual_cr.pl <- ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 3) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#fc8d59","#fdcb44","black", "#4575b4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))
```

#------------ reaction norm heatmap of R log10 + I log10 ------------#
# process data
```{r}
# make heatmap df
R_I.hm <- par_to_hm_te(par = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
             cue_range = seq(6,	7, length.out = 500),
             cue_range_b = seq(0,	6.77815125, length.out = 500))

# process dynamics
R_I.dyn <- dual.cr %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(log_R = log10(R),
         log_I = log10(I))

# examine sexual iRBC as well
R_Ig.dyn <- dual.cr %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(log_R = log10(R),
         log_Ig = log10(Ig))
max(R_Ig.dyn$Ig) 
```

# plot
```{r}
dual_rn.pl <- ggplot() +
  geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
  scale_fill_viridis_c() +
  geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
  xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
  ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
  labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
  theme_dark()

# just testing for sexual iRBC vs RBC
ggplot() +
  geom_path(data = R_Ig.dyn, aes(x = Ig, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_Ig.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = Ig, y = log_R), color = "white") +
  scale_x_continuous(trans = "log10") +
  xlim(0, 200000) +
  labs(y = "RBC log10", x = "Sexual iRBC log10", fill = "Conversion rate") +
  theme_dark()
```

# save figure for poster
```{r}
dual_rn.pl2 <- ggplot() +
  geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
  scale_fill_viridis_c() +
  geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
  xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
  ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
  labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
  theme_dark() + theme(legend.position="top") 

dual_cr.pl2 <- ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 2) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#4575b4", "#91bfdb","#fc8d59","#fdcb44")) +
  theme_bw() +
  theme(legend.position="top") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))

ggarrange(dual_cr.pl2, dual_rn.pl2, align = "h", widths = c(1.25, 1))
ggsave(here("poster/dual_cue.png"), width = 7, height = 4)
```


#-------- assemble final figure -------------#
```{r}
# assemble panel B and C
dual_pl.BC <- ggarrange(dual_cr.pl, dual_rn.pl, align = "v", ncol = 1, labels = c("B", "C"))

# assemble panel A
ggarrange(dual_si_fitness.pl, dual_pl.BC, ncol = 2, labels = c("A", ""), widths = c(1, 0.75))
ggsave(here("figures/plos-bio/dual_cue.tiff"), units = "px", width = 2250, height = 1400, scale = 1.5, dpi=300,  bg = "white")
```


#============================================#
# dynamics of dual cue simulation (all combinations)
#============================================#
# conversion rate
```{r}
# import in dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))

# keep only cr and make labels for plotting. we are going to omit any cue combinations containing I+Ig due to those not optimizing at all
dual_cr.df_p <- dual_dyn.df %>% 
  filter(variable == "cr" & cue != "I+Ig" & cue_b != "I+Ig") %>% 
  mutate(label_plot = paste(label, "+", label_b)) %>% 
  group_by(label_plot) %>% 
  arrange(time, .by_group = T) %>% 
  filter(row_number() %% 10 == 1) %>%  # cut down on number of rows
  left_join(select(dual_fitness.df, id, id_b, fitness = value), by = c("id", "id_b")) # get fitness

# plot raster
ggplot(data = dual_cr.df_p, aes(x = time, y = fct_reorder(label_plot, fitness), fill = value)) +
  geom_raster() +
  scale_fill_viridis_c() +
  xlim(1, 20) +
  labs(x = "Time (days)", y = "Dual cue combination", fill = "Conversion rate") +
  theme_bw()

ggsave(here("figures/plos-bio/dual_cue_cr.tiff"), units = "px", width = 2000, height = 1500, dpi=300,  bg = "white")
```

# cumultative transmission transmission potential
Proving a point to myself
```{r}
dual_tau.df_p <- dual_dyn.df %>% 
  filter(variable == "tau_cum" & cue != "I+Ig" & cue_b != "I+Ig") %>% 
  mutate(label_plot = paste(label, "+", label_b)) %>% 
  group_by(label_plot) %>% 
  arrange(time, .by_group = T) %>% 
  filter(row_number() %% 10 == 1) %>%  # cut down on number of rows
  left_join(select(dual_fitness.df, id, id_b, fitness = value), by = c("id", "id_b")) %>%  # get fitness 
  filter(fitness > 9.65)

# it seems that R log+I log only inches ahead of other at the very end of the infection, suggesting that terminal investment is at play
ggplot(data = dual_tau.df_p, aes(x = time, y = value, color = label_plot)) +
  geom_line() +
  scale_fill_viridis_c() +
  xlim(15, 20) +
  ylim(5,10) +
  labs(x = "Time (days)", y = "Dual cue combination", fill = "Conversion rate") +
  theme_bw()
```




#============================================#
# get dual cue disease map (simulated)
#============================================#
I am going to take the optimal dynamic (R log10 + I log10) and plot all possible disease curves and see if any follow the hysteresis curve. See below for real life disease curve.

# process 
```{r}
# get list of interested variables for plotting. drop cr, fitness, and death probability, immunity, and merozoites (which is not optimized in dual cue)
dual_var.ls <- unique(dual.cr$variable)[!unique(dual.cr$variable) %in% c("tau", "tau_cum", "cr", "cr_t", "ID", "N", "W", "M", "Mg")]

# create all possible variable combinations. 6 variable combination (4*4 with 4 same redundant and half of that) is expected
dual_var.comb <- tidyr::expand_grid(x = dual_var.ls,
                      y = dual_var.ls) %>% 
  filter(x != y) %>% 
  mutate(tmp = paste0(pmin(x, y), pmax(x, y))) %>% # eliminate same variable but different order
  slice_head(n = 1, by = tmp) %>% 
  select(-tmp)

dual_var.comb

# filter out intermediate data points in dynamic so we could have faster plotting. Also make wider for plotting
## filtering out all (none cr variable) value below 1 so negative vlues do not overwhelm disease curve. For cr, keep only value below or equal to 1
dual_cr.f <- dual.cr %>% 
  filter(variable %in% c(dual_var.ls, "cr") & case_when(variable != "cr" ~ value > 1, T~ value <=1)) %>% 
  tidyr::pivot_wider(names_from = variable, id_cols = c(time)) %>% 
  arrange(time) %>% 
  filter(row_number() %% 100 == 1) 

```

# plot
```{r}
# make path plot
## no logged on x and y
dual_curve.pl <- purrr::map2(.x = dual_var.comb$x, 
     .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = .y, y = .x, color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = .y, y = .x, color = "cr"), size = 3) +
          labs(color = "Conversion rate") + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          scale_x_continuous(labels = scientific) +
          scale_y_continuous(labels = scientific) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )

## logged on x
dual_curve_xlog.pl <- purrr::map2(
  .x = dual_var.comb$x, 
  .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = sprintf("log(%s)", .y), y = .x, color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1.5) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = sprintf("log(%s)", .y), y = .x, color = "cr"), size = 3) +
          labs(color = "Conversion rate",
            x = paste0("log10(", .y,")")) + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          scale_y_continuous(labels = scientific) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )

## logged on y
dual_curve_ylog.pl <- purrr::map2(
  .x = dual_var.comb$x, 
  .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = .y, y = sprintf("log(%s)", .x), color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1.5) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = .y, y = sprintf("log(%s)", .x), color = "cr"), size = 3) +
          labs(color = "Conversion rate",
            y = paste0("log10(", .x,")")) + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          scale_x_continuous(labels = scientific) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )


# both logged
dual_curve_log.pl <- purrr::map2(
  .x = dual_var.comb$x, 
  .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = sprintf("log(%s)", .y), y = sprintf("log(%s)", .x), color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1.5) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = sprintf("log(%s)", .y), y = sprintf("log(%s)", .x), color = "cr"), size = 3) +
          labs(color = "Conversion rate",
               x = paste0("log10(", .y,")"),
            y = paste0("log10(", .x,")")) + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )

# save all 4 plates
ggarrange(plotlist = dual_curve.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve1.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")

ggarrange(plotlist = dual_curve_xlog.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve2.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")

ggarrange(plotlist = dual_curve_ylog.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve3.tiff"), units = "px", width = 1000, height =1000, scale = 1.5, dpi=300,  bg = "white")

ggarrange(plotlist = dual_curve_log.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve4.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")
```


#============================================#
# real life disease curve
#============================================#
# execute code from report 10 to get final dataset
The following graphs will be made:
- R vs iRBC
- R log10 vs iRBC
- R vs iRBC log10
- R log10 vs iRBC log10

- G vs iRBC
- G log10 vs iRBC
- G vs iRBC log10
- G log10 vs iRBC log10

- R vs G
- R log10 vs G
- R vs G log10
- R log10 vs G log10

R on y-axis
```{r}
r_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = asex)) +
  geom_path(aes(y = RBC, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = asex)) +
  geom_path(aes(y = log10(RBC), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_ilog.dc <-ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(asex))) +
  geom_path(aes(y = RBC, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(asex))) +
  geom_path(aes(y = log10(RBC), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

G on y-axis
```{r}
g_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = asex)) +
  geom_path(aes(y = gam, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = asex)) +
  geom_path(aes(y = log10(gam), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

g_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = log10(asex))) +
  geom_path(aes(y = gam, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = log10(asex))) +
  geom_path(aes(y = log10(gam), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

R vs G
```{r}
r_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = gam)) +
  geom_path(aes(y = RBC, x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = gam)) +
  geom_path(aes(y = log10(RBC), x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "log10(RBC per µL)", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(gam))) +
  geom_path(aes(y = RBC, x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(gam))) +
  geom_path(aes(y = log10(RBC), x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

# plot together
```{r}
ggarrange(r_i.dc, rlog_i.dc, r_ilog.dc, rlog_ilog.dc,
          g_i.dc, glog_i.dc, g_ilog.dc, glog_ilog.dc,
          r_g.dc, rlog_g.dc, r_glog.dc, rlog_glog.dc, 
          ncol = 4, nrow = 3, align = "hv", common.legend = T)
ggsave(here("figures/plos-bio/exp_disease-curve.tiff"), units = "px", width = 2250, height = 1500, scale = 2, dpi=300,  bg = "white")
```

#===========================================#
# static competition
#===========================================#
#------- heat map ---------------#
# calculate fitness difference for 20 days
```{r}
# get dynamics
static.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static.ls <- lapply(static.ls, read_parquet)

# get fitness at day 20 (optimized for 20 days)
static_fitness.ls <- mclapply(static.ls, function(x){
  x %>% filter(time == 20 & variable %in% c("tau_cum1", "tau_cum2"))
})
static_fitness.df <- do.call(rbind, static_fitness.ls)

static_fitness.df <- tidyr::pivot_wider(static_fitness.df, names_from = "variable", id_cols = c("id_1", "id_2")) %>% 
  group_by(id_1, id_2) %>% 
  mutate(fitness_difference = tau_cum1-tau_cum2)
write.csv(static_fitness.df, here("data/ci_static.csv"))
```

# import and process data
```{r}
# import in dataset
static.df <- read.csv(here("data/ci_static.csv"))
ez_label <- read.csv(here("data/ez_label.csv"))

# join with labelling
static.df2 <- static.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("id_1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, label_ci_2 = label_ci), by = c("id_2" = "id_ci")) %>% 
  select(label_ci_1, label_ci_2, fitness_difference) %>% 
  mutate(label_ci_1 = gsub("log", "log10", label_ci_1),
         label_ci_2 = gsub("log", "log10", label_ci_2))

# get reverse order, which is simply invovles switching the cues around the multiplying the fitness by negative 1
static.df3 <- static.df2
names(static.df3) <- c("label_ci_2", "label_ci_1", "fitness_difference")
static.df3$fitness_difference <- static.df2$fitness_difference * -1

# join
static.df4 <- rbind(static.df2, static.df3)

# get mean
static.mean <- static.df4 %>% 
  distinct(label_ci_1, label_ci_2, .keep_all = T) %>% 
  group_by(label_ci_1) %>% 
  summarize(mean_fitness = mean(fitness_difference, na.rm = T))
  

# join static.df4 with mean for order sorting
static.df5 <- static.df4 %>% 
  left_join(static.mean, by = "label_ci_1") %>% 
  mutate(label_ci_1 = fct_reorder(label_ci_1, mean_fitness),
         label_ci_2 = fct_relevel(label_ci_2, levels(static.df5$label_ci_1)))
```

# plot
```{r}
# heatmap
static.pl1 <- ggplot(data = static.df5, aes(x = label_ci_2, y = label_ci_1, mean_fitness, fill = fitness_difference))+
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "#fc8d59", high = "#4575b4", mid = "white", 
   midpoint = 0, space = "Lab", lim = c(-0.95, 0.95), name="Fitness\ndifference") +
  theme_minimal() +  
  theme(legend.position = "left",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = -1, l = -1, unit = "pt")) + 
  labs(x = "Strain 2 cue", y = "Strain 1 cue") +
  coord_fixed()

# mean 
static.pl2 <- ggplot() +
  geom_bar(data =  static.mean, aes(y = fct_reorder(label_ci_1, mean_fitness), x = mean_fitness), stat = "identity") +
  labs(y = "", x = "Mean fitness\ndifference") +
  theme_bw() +
  theme(plot.margin = margin(l = 0),
       panel.grid.major = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
    axis.text.y=element_blank(),
    axis.ticks.y=element_blank())

ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
ggsave(here("figures/plos-bio/static_competition_a.tiff"), units = "px", width = 2250, height = 1500, scale = 1.2, dpi=300,  bg = "white")
```

#------ effect cue perception -------#
## logging
```{r}
# get non-logged pairings
static_nolog <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")

static_log <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

static_log.df <- left_join(
  select(static_nolog, cue_1, label_ci_2, log_1, None = fitness_difference),
  select(static_log, cue_1, label_ci_2, log_1, Log = fitness_difference),
  by = c("cue_1", "label_ci_2")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
```

# combined
```{r}
static_nocomb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

static_comb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
static_comb.df <- left_join(
  select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_difference),
  select(static_comb, cue_1, label_ci_2, log_1, Total = fitness_difference),
  by = c("cue_1", "log_1", "label_ci_2")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
```

# plot
```{r}
static_log.pl <- ggpaired(static_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = -0.1)

static_comb.pl <- ggpaired(static_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = -0.2)

ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggsave(here("figures/plos-bio/static_competition_b.tiff"), units = "px", width = 1000, height = 2000, scale = 1.2, dpi=300,  bg = "white")
```


#===========================================#
# invasion analysis
#===========================================#
# import in data (already 20 days )
```{r}
invade.df <- read.csv(here("data/ci_invasion.csv"))
```


# process data for invasion matrix
```{r}
invade.mat <- invade.df %>% 
  group_by(V1 = pmin(mut_id, res_id), V2 = pmax(mut_id, res_id)) %>% # group by cue competition, irregardless of order
  mutate(id_alt = paste0(V1, V2),
         invade = case_when(
           fitness > 0 ~ "invade",
           fitness < 0 ~ "not invade"
         )) %>% 
  group_by(id_alt) %>% 
  mutate(
    mut_is_V1 = case_when(
    mut_id == V1 ~ "V1_invade",
    mut_id != V1 ~ "V1_invaded")) %>% 
  arrange(id_alt) %>% 
  select(fitness, V1, V2, id_alt, invade, mut_is_V1) %>% 
  tidyr::pivot_wider(names_from = mut_is_V1, values_from = fitness) %>% 
  group_by(id_alt) %>% 
  mutate(V1_invade2 = gsub("NA", "", paste0(V1_invade, collapse = "")),
         V1_invaded2 = gsub("NA", "", paste0(V1_invaded, collapse = ""))) %>% 
  distinct(id_alt, .keep_all = T) %>% 
  mutate(
    category = case_when(
    V1_invade2 > 0 & V1_invaded2 > 0 ~ "Mutual invasion",
    V1_invade2 > 0 & V1_invaded2 < 0 ~ "Only strain 1 invasion",
    V1_invade2 < 0 & V1_invaded2 > 0 ~ "Only strain 2 invasion",
    V1_invade2 < 0 & V1_invaded2 < 0 ~ "Mutual non-invasion"
  )) %>% 
  select(V1, V2, invasion = category)

invade.df %>% filter(mut_id == "G-i_none")
invade.df %>% filter(res_id == "G-i_none")
```


```{r}
# for plotting, need to get all same cue vs same cue, which we will set to NA
invade.NA <- cbind.data.frame(`V1` = unique(invade.mat$V1),
      `V2` = unique(invade.mat$V1),
      invasion = NA)

invade.mat2 <- rbind(invade.mat, invade.NA)

# get label
invade.mat3 <- invade.mat2 %>% 
  left_join(select(ez_label, id_ci, V1_label = label_ci), by = c("V1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, V2_label = label_ci), by = c("V2" = "id_ci")) %>% 
  mutate(V1_label = gsub("log", "log10", V1_label),
                                      V2_label = gsub("log", "log10", V2_label))


invade.mat4 <- rbind(
  select(invade.mat3, V1_label, V2_label, invasion),
  select(invade.mat3, V2_label = V1_label, V1_label = V2_label) %>% mutate(invasion = NA)) %>%
  mutate(
    invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  )) %>% 
  filter(!is.na(V1_label))

invade.mat4$V1_label <- factor(invade.mat4$V1_label, levels =  c("G log10", "G", "G1+G2", "I log10", "I", "I+Ig log10", "I+Ig", "I1+I2 log10", "I1+I2", "Ig log10", "Ig", "Ig1+Ig2", "R log10", "R", "sum log10", "sum"))
invade.mat4$V2_label <- factor(invade.mat4$V2_label, levels =  c("G", "G1+G2", "I log10", "I", "I+Ig log10", "I+Ig", "I1+I2 log10", "I1+I2", "Ig log10", "Ig", "Ig1+Ig2", "R log10", "R", "sum log10", "sum", "G log10"))
```


# plot invasion matrix
```{r}
invasion.pl1 <- ggplot(data = invade.mat4, aes(x = V2_label, y = V1_label, fill = invasion_2)) +
  geom_tile(color = "black") +
  theme_minimal() +  
  theme(legend.position = "right",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = 0)) + 
  labs(fill = "Invasibility", x = "Competing cue", y = "Reference cue") +
  scale_x_discrete(limits = rev) +
  scale_y_discrete(limits = rev) +
  scale_fill_manual(values = c("Competitive exclusion\nof another cue" = "#4575b4",
                               "Competitive exclusion\nby another cue" = "#fc8d59",
                               "Mutual invasion" = "#fee090"),
                    na.value = "white") +
  theme(legend.position = "none")
```

# create summary bar chart
```{r}
# create a stacked barchart for summary
## filter out na
invade.matalt <- invade.mat3 %>% na.exclude()

# get frquency from both sides. Note when grouping for V2, from the perspective of cue 2, scenarrio when strain 2 invade = strain 1 invade
invade.matalt1 <- invade.matalt %>% group_by(V1_label, invasion) %>% 
  summarize(frequency_1 = n())

invade.matalt2 <- invade.matalt %>%
  mutate(invasion_alt = case_when(
    invasion == "Only strain 1 invasion" ~ "Only strain 2 invasion",
    invasion == "Only strain 2 invasion" ~ "Only strain 1 invasion",
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Mutual non-invasion" ~ "Mutual non-invasion"
  )) %>% 
  group_by(V2_label, invasion_alt) %>% 
  summarize(frequency_2 = n())     

# full join and sum. has confirmed all of them add up to 14 
invade.matalt3 <- full_join(invade.matalt1, invade.matalt2, by = c("V1_label" = "V2_label", "invasion" = "invasion_alt"))

invade.matalt3[is.na(invade.matalt3)] <- 0
invade.matalt4 <- invade.matalt3 %>% 
  mutate(freq = frequency_1 + frequency_2) %>% 
  mutate(temp = case_when(
    invasion == "Only strain 1 invasion" ~ freq
  )) %>% 
  group_by(V1_label) %>% 
  mutate(invade_1_freq = max(temp, na.rm = T)) %>% 
  mutate(invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  ))
```



```{r}
invasion.pl2 <- ggplot() +
  geom_bar(data = invade.matalt4, aes(x = freq, y = reorder(V1_label, invade_1_freq), fill = forcats::fct_rev(factor(invasion_2, levels = c("Competitive exclusion\nof another cue", "Competitive exclusion\nby another cue", "Mutual invasion", "Mutual non-invasion")))), stat = "identity") +
  labs(x = "Frequency", fill = "Invasibility", y = "Cue") +
  theme_bw()  +
  scale_fill_manual(values = c("Competitive exclusion\nof another cue" = "#4575b4",
                               "Competitive exclusion\nby another cue" = "#fc8d59",
                               "Mutual invasion" = "#fee090")) +
  theme(text = element_text(size = 15))
```

# plot together
```{r}
ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/plos-bio/invasion_a.tiff"), units = "px", width = 2250, height = 1100, scale = 1.4, dpi=300,  bg = "white")
```

#---------------- invasion pairwise comparison-----------------#
## proces data
```{r}
# join invade df with label because I am lazy
invade.df2 <- invade.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("mut_id" = "id_ci"))

invade.df2 
```

# log
```{r}
# get non-logged pairings
invade_nolog <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")


invade_log <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

invade_log.df <- left_join(
  select(invade_nolog, cue_1, res_id, log_1, None = fitness),
  select(invade_log, cue_1, res_id, log_1, Log = fitness),
  by = c("cue_1", "res_id")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))

invade_log
```

# combined
```{r}
invade_nocomb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

invade_comb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
invade_comb.df <- left_join(
  select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
  select(invade_comb, cue_1, res_id, log_1, Total = fitness),
  by = c("cue_1", "log_1", "res_id")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
invade_comb.df
```

# plot
```{r}
invade_log.pl <- ggpaired(invade_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4"))+
  stat_compare_means(paired = TRUE, hjust = -0)

invade_comb.pl <- ggpaired(invade_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4"))+
  stat_compare_means(paired = TRUE, hjust = -0)

ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggsave(here("figures/plos-bio/invasion_b.tiff"), units = "px", width = 2250, height = 900, scale = 1.2, dpi=300,  bg = "white")
```


#===========================================#
# Cue performance across single, co-infection, static, and invasion
#===========================================#
```{r}
# import in all the ranks
si_opt.df <- read.csv(here("data/si_opt.csv"))
ci_opt.df <- read.csv(here("data/ci_opt.csv"))
static.df <- read.csv(here("data/ci_static.csv"))
invasion.df <- read.csv(here("data/ci_invasion.csv"))

# calculate mean fitness
static.mean ## already calculated
invasion.mean <- rbind(select(invasion.df, cue = mut_id, fitness),
      select(invasion.df %>% mutate(fitness2 = fitness * -1), cue = res_id, fitness = fitness2)) %>% 
  group_by(cue) %>% 
  summarize(mean_fitness = mean(fitness)) %>% 
  arrange(desc(mean_fitness))

# calculate ranks
si.rank <- si_opt.df %>% 
  left_join(select(ez_label, id_ci, id_si, label = ez_label_si), by = c("id" = "id_si")) %>% 
  mutate(rank = dense_rank(-fitness_20),
         model = "Single infection") %>% 
  select(rank, model, id = id_ci, label)

ci.rank <- ci_opt.df %>% 
  left_join(select(ci_fitness.df, value, label), by = "label") %>% 
  left_join(select(ez_label, id_ci, id_si, ez_label), by = c("id" = "id_ci")) %>% 
  mutate(rank = rank(-value),
         model = "Co-infection") %>% 
  select(rank, model, id, label = ez_label)

static.rank <- static.mean %>% 
  mutate(label_ci = gsub("log10", "log", label_ci_1)) %>% 
  left_join(select(ez_label, id_ci, label_ci, ez_label), by = c("label_ci" = "label_ci")) %>% 
  mutate(rank = rank(-mean_fitness),
         model = "Static mixed genotype") %>% 
  select(rank, model, id = id_ci, label = ez_label)

invasion.rank <- invasion.mean %>% 
  left_join(select(ez_label, id_ci, ez_label), by = c("cue" = "id_ci")) %>% 
  mutate(rank = rank(-mean_fitness),
         model = "Invasive mixed genotype") %>% 
  select(rank, model, id = cue, label = ez_label) %>% 
  mutate(label = gsub("\n", " ", paste0("   ", label)))

static.rank
# concatenate
fitness.rank <- rbind(
  si.rank, ci.rank, static.rank, invasion.rank
) %>% 
  mutate(model = fct_relevel(model, c("Single infection", "Co-infection", "Static mixed genotype", "Invasive mixed genotype")))

# highlight the good ones
fitness.rank_good <- fitness.rank %>% 
  filter(id %in% c("Ig-i_log", "I-i+Ig-i_log", "sum_log", "G-i_log"))
```

# plot
```{r}
library(ggbump)

ggplot() +
  geom_bump(data = fitness.rank, aes(x = model, y = rank, group = id), color = "grey") +
  geom_point(data = fitness.rank, aes(x = model, y = rank, group = id), color = "grey", size = 3) +
  geom_bump(data = fitness.rank_good, aes(x = model, y = rank, group = id, color = id)) +
  geom_point(data = fitness.rank_good, aes(x = model, y = rank, group = id, color = id), size = 3) +
  geom_text(data = invasion.rank, aes(x = model, y = rank, label = label), size = 3.5, hjust = 0, inherit.aes = F) +
  scale_color_viridis_d() +
  theme_minimal() +
  coord_cartesian(clip = "off") +
  scale_y_reverse() +
  expand_limits(x = 5.5) +
  theme(legend.position = "none") +
  labs(x = "Model", y = "Mean fitness rank")
ggsave(here("figures/plos-bio/fitness_rank.tiff"), units = "px", width = 2250, height = 1500, scale = 1, dpi=300,  bg = "white")
```


#-=====================#
# Partitioning best cue
#=====================-#
#------- single infection -----------#
# redo some optimization (lower fitness in no R than default)
```{r}
source(here("functions/chabaudi_si_clean_R.R"))
source(here("functions/chabaudi_si_clean_N.R"))
# I none
cl <- makeCluster(detectCores()); setDefaultCluster(cl = cl)
I_no_R <- optimParallel(
    par = rep(0.5,4), # start at 0.5x4
    fn = chabaudi_si_clean_R, 
    control = list(trace = 6, fnscale = -1),
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  seq(0, 6*(10^6), by = (6*(10^6))/5000),
    cue = "I",
    log_cue = "none",
    solver = "vode")
stopCluster(cl)
# 0.144021 -43.1046 2030.27 -524.686 
# 8.69589
```

# import and process data
```{r}
# import in data
si_partition.ls <- list.files(path = here("data/partition/si/"), pattern = "*.csv", full.names = T)
si_partition.ls <- lapply(si_partition.ls, read.csv)
si_partition.df <- do.call(rbind, si_partition.ls)

# combine with si fitness (default)
si_partition.df <- si_partition.df %>% left_join(select(si_fitness.df, id, fitness = value), by = "id")

# make longer
si_partition.df2 <- tidyr::pivot_longer(si_partition.df, c(fitness_R, fitness_N, fitness_W, fitness))

# calculate coefficient of variation. Also rename
si_partition.df2 <- si_partition.df2 %>% 
  group_by(name) %>% 
  mutate(cv = sd(value)/mean(value)*100,
         mean = mean(value),
         category = case_when(
           name == "fitness_R" ~ "No RBC limitation",
           name == "fitness_W" ~ "No targeted immunity",
           name == "fitness_N" ~ "No indiscriminate\nimmunity",
           name == "fitness" ~ "Default"
         ))

```

# plot
```{r}
library(ungeviz)
# raw fitness
si_partition.pl1 <- ggplot() +
  geom_vpline(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = mean, group = category, color = category), show.legend = F, size = 1) +
  geom_point(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value), size = 2, alpha = 0.7) +
  geom_line(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value, group = id), alpha = 0.2) +
  labs(x = "Fitness", y = "Conditions") +
  theme_bw()

# coefficient of variation
si_partition.pl2 <- ggplot() +
  geom_bar(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = cv), stat = "identity") +
  labs(x = "Coefficient of\nvariation (%)", y = "") +
  theme_bw() +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

si_partition.pl <- ggarrange(si_partition.pl1, si_partition.pl2, widths = c(1, 0.3), align = "h")
si_partition.pl

ggsave(here("figures/plos-bio/partition_fitness.tiff"), width = 7, height = 4)
```

#------- consequences of no targeted immunity ------------#
# get dynamics of no targeted immunity
```{r}
get_dyn <- function(df){
  
  source(here("functions/chabaudi_si_clean_W.R"))
  id <- df$id
  cue <- df$cue
  log <- df$log
  par <- c(df$var_W1, df$var_W2, df$var_W3, df$var_W4)
  cue_range <- seq(df$low, df$high, by = df$by)
  
  # get dynamics
  dyn <- chabaudi_si_clean_W(
    parameters_cr = par,
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  cue_range,
    cue = cue,
    log_cue = log,
    solver = "vode",
    dyn = T
  )
  
  # combine
  dyn2 <- cbind(dyn, id = id, cue = cue, log = log)
  
  write_parquet(dyn2, paste0(here("data/partition/si_dyn/"), id, "_noW_dyn.parquet"))
  
}
```

# get df to run
```{r}
# join with cue_range
cue_range_si.df <- read.csv(here("data/cue_range_si.csv"))
si_partition.df3 <- si_partition.df %>% left_join(select(cue_range_si.df, low, high, by, id), "id")

# lapply loop
si_partition.ls <- split(si_partition.df3, seq(nrow(si_partition.df3)))
mclapply(si_partition.ls, get_dyn)
```

# process dataframe
```{r}
# import in dataframe
no_W.ls <- list.files(here("data/partition/si_dyn/"), pattern = "*noW_dyn.parquet", full.names = T)
no_W.df <- lapply(no_W.ls, read_parquet)
no_W.df <- do.call(rbind, no_W.df)

# combine with ez label
ez_label <- read.csv(here("data/ez_label.csv"))
no_W.df <- left_join(no_W.df, ez_label, by = c("id" = "id_si"))

# get conversion rate 
no_W.cr <- no_W.df %>% filter(variable == "cr")
no_W.I <- no_W.df %>% filter(variable == "I")

# get default conversion rate dynamics
si_dyn.df <- left_join(si_dyn.df, ez_label, by = c("id" = "id_si"))
si_dyn.cr <- si_dyn.df %>% filter(variable == "cr")
si_dyn.I <- si_dyn.df %>% filter(variable == "I")
```

# plot conversion rate
mechanism: targeted immunity led to lower parasite density in the initial stages, which prevents parasites from making the switch from no conversion rate to high conversion rate. When parsite density undergoes drastic increase at the beginning due to lower immunity, this presents a higher degree of signal that allows parasite to make the switch appropriately
```{r}
partition_cr.pl <- ggplot() +
  geom_line(data = no_W.cr, aes(x = time, y= value, color = "No targeted immunity")) +
  geom_line(data = si_dyn.cr, aes(x = time, y= value, color = "Default")) +
  facet_wrap(~ez_label_si, ncol = 5) +
  xlim(0, 20) +
  geom_vline(xintercept = 7) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Condition") +
  theme_bw()

no_W.cr
```

#----- cue state --------------#

# function to get cue states
```{r}
# function to get cue states
get_cue_state <- function(df){
  cue <- trimws(gsub("_log|_none", "", unique(df$id)))
  if(cue != "I+Ig"){
  df2 <- df %>% filter(variable == cue)
  if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  if(cue == "I+Ig"){
    df2 <- df %>% filter(variable %in% c("I", "Ig")) %>% 
      group_by(time) %>% 
      mutate(value = sum(value))
    
    if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  df2$value[df2$value == -Inf] <- 0
  
  write_parquet(df2, paste0(here("data/partition/si_default_state/"), unique(df$id), "_noW_state.parquet"))
}
```

# run function
```{r}
# split dynamics based on id
no_W.split <- split(no_W.df, no_W.df$id)

# run function
mclapply(no_W.split, get_cue_state)

# get dataframe
no_W.state <- list.files(here("data/partition/si_state/"), pattern = "*.parquet", full.names = T)
no_W.state <- lapply(no_W.state, read_parquet)
no_W.state <- do.call(rbind, no_W.state)
no_W.state$value[no_W.state$value < 0] <- 0

# get same for si infection
default.split <- split(si_dyn.df, si_dyn.df$id)
mclapply(default.split, get_cue_state)
default.state <- list.files(here("data/partition/si_default_state/"), pattern = "*.parquet", full.names = T)
default.state <- lapply(default.state, read_parquet)
default.state <- do.call(rbind, default.state)
default.state$value[default.state$value < 0] <- 0

# manually correct non-logging
I_Ig.corr <- no_W.state %>% filter(id == "I+Ig_log") %>% 
  mutate(value = log10(value))
I_Ig.corr$value[I_Ig.corr$value < 0] <- 0

no_W.state2 <- no_W.state %>% filter(id != "I+Ig_log")
no_W.state2 <- no_W.state2 %>% rbind(no_W.state2, I_Ig.corr)
```

# plot
absence of targeted immunity led to drastic increase in parasite density in early phases of infection. This produces high signal intensity for parasite and host-based cues, especially non-logged ones, which allows for state differentation. While this can be viewed as a modelling artifiact, it should be noted that the logged cues seldom changed as these changes in early infection did little to alter the actual early signal intensity sensed by the parasite. In an environment where there is heterogeneity in host response, and thus, signal, logging allows for parasites to adapt optimal strategy whereas non-logged cues must contend with sensitivity to immunity.
```{r}
# function to individually plot stuff
plot_state <- function(df1, df2){
  
  # plot state dynamics
  state_pl <- ggplot() +
  geom_line(data = df1, aes(x = time, y = value, color = name, group = name)) +
  facet_wrap(~ez_label_si, scales = "free") +
  xlim(1,20) +
  theme_bw() +
  theme(legend.position="none") +
  labs(x = "", y = "Cue") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59"))
  
  # plot conversion rate dynamics
  cr_pl <- ggplot() +
  geom_raster(data = df2, aes(x = time, y = name, fill = value)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "none") +
    scale_fill_viridis_c(lim = c(0, 1))
  
  # arrange
  ggarrange(state_pl, cr_pl, ncol = 1, nrow = 2, align = "v", heights = c(1, 0.4))
  ggsave(paste0(here("figures/plos-bio/partition/"), unique(df1$id), ".tiff"), width = 4.5, height = 3.5)
}
```

# split
```{r}
# combine state
noW_default.state <- left_join(
  select(no_W.state2, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(default.state %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))

noW_default.state2 <- tidyr::pivot_longer(noW_default.state, c(`No targeted\nimmunity`, `Default`))
# combine conversion raster
noW_default.cr <- left_join(
  select(no_W.cr, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(si_dyn.cr %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.cr2 <- tidyr::pivot_longer(noW_default.cr, c(`No targeted\nimmunity`, `Default`))

# split
noW_default_state.ls <- split(noW_default.state2, noW_default.state2$id)
noW_default_cr.ls <- split(noW_default.cr2, noW_default.cr2$id)

# run function
mapply(plot_state, noW_default_state.ls, noW_default_cr.ls)
```

#-------- reaction norms of default vs optimized ------------#
# get reaction norm and rug data
```{r}
source(here("functions/par_to_df.R"))

# Gametocyte
g_log.rn <- par_to_df(par = c(1.211521,	-3.936778,	-1.312944,	-1.285713), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g_log.rn2 <- par_to_df(par = c(1.393860539,	-4.253007616,	-0.313947029,	-2.000857344), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))

g.rn <- par_to_df(par = c(0.04061288,	-9.31445958,	74.13015506,	-431.5984364), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
g.rn2 <- par_to_df(par = c(0.541729073,	-3.904616443,	0.87487412,	-0.694177021), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))

# I+Ig
I_Ig_log.rn <- par_to_df(par = c(3.594042,	4.157744,	-13.530672,	2.599905), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig_log.rn2 <- par_to_df(par = c(63.71893822,	-87.77671601,	-56.55475514,	-66.02209549), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))

I_Ig.rn <- par_to_df(par = c(0.3159297,	-46.1104558,	1250.752908,	-6.1982093), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
I_Ig.rn2 <- par_to_df(par = c(0.731982784,	-21.69799449,	149.7841876,	17.02551769), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))

# convert log to non-logged scale
g_log.rn$cue_range <- 10^(g_log.rn$cue_range)
g_log.rn2$cue_range <- 10^(g_log.rn2$cue_range)
I_Ig_log.rn$cue_range <- 10^(I_Ig_log.rn$cue_range)
I_Ig_log.rn2$cue_range <- 10^(I_Ig_log.rn2$cue_range)

# get rug
g_log.rug <- default.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  select(label_si, value)

g_log.rug2 <- no_W.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  filter(value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig_log.rug <- default.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

I_Ig_log.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

g.rug <- default.state %>% 
  filter(label_si == "G") %>% 
  select(label_si, value)

g.rug2 <- no_W.state %>% 
  filter(label_si == "G" & value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig.rug <- default.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

I_Ig.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

# get rug limits
rug_lim <- rbind(g_log.rug,
                 g_log.rug2,
                 I_Ig_log.rug,
                 I_Ig_log.rug2,
                 g.rug,
                 g.rug2,
                 I_Ig.rug,
                 I_Ig.rug2) %>% 
  group_by(label_si) %>% 
  summarize(max = max(hablar::s(value), na.rm = T),
            min = min(hablar::s(value), na.rm = T))

# combine and filter
rn <- rbind(
  cbind(g_log.rn, label_si = "G log", condition = "Default"),
  cbind(g_log.rn2, label_si = "G log", condition = "No targeted\nimmunity"),
  cbind(g.rn, label_si = "G", condition = "Default"),
  cbind(g.rn2, label_si = "G", condition = "No targeted\nimmunity"),
  cbind(I_Ig_log.rn, label_si = "I+Ig log", condition = "Default"),
  cbind(I_Ig_log.rn2, label_si = "I+Ig log", condition = "No targeted\nimmunity"),
  cbind(I_Ig.rn, label_si = "I+Ig", condition = "Default"),
  cbind(I_Ig.rn2, label_si = "I+Ig", condition = "No targeted\nimmunity")
) %>% 
  left_join(rug_lim, by = "label_si") %>% 
  group_by(label_si) %>% 
  filter(cue_range <= max & cue_range >= min)

# combine rug
rug <- rbind(cbind(g_log.rug, condition = "Default"),
             cbind(g_log.rug2, condition = "No targeted\nimmunity"),
             cbind(g.rug, condition = "Default"),
             cbind(g.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig_log.rug, condition = "Default"),
             cbind(I_Ig_log.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig.rug, condition = "Default"),
             cbind(I_Ig.rug2, condition = "No targeted\nimmunity"))

# cobine with ezlabel
rn2 <- rn %>% left_join(ez_label, by = "label_si")
rug2 <- rug %>% left_join(ez_label, by = "label_si")

# filter rug
default.rug <- rug2 %>% filter(condition == "Default")
no.rug <- rug2 %>% filter(condition == "No targeted\nimmunity")
```

# plot
```{r}
ggplot() +
  geom_line(data = rn2, aes(x = cue_range, y = cr, color = condition)) +
  geom_rug(data = default.rug, aes(x = value, color = condition), sides = "b") +
  geom_rug(data = no.rug, aes(x = value, color = condition), sides = "t") +
  facet_wrap(~fct_relevel(ez_label_si, c("Gametocyte log10", "Gametocyte", "Asexual&sexual\niRBC log10", "Asexual&sexual iRBC")), scales = "free_x") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE)) +
  theme_bw() +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59")) +
  ylim(0, 1.1) +
  labs(x = "Cue range", y = "Conversion rate", color = "Condition")

ggsave(here("figures/plos-bio/partition_rn.tiff"), width = 7.5, height = 6)
```

# get conversion rate legend
```{r}
noW_default.cr %>% filter(id == "G_log") %>% 
ggplot() +
  geom_raster(aes(x = time, y = id, fill = Default)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)",
         fill = "Conversion rate") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),) +
    scale_fill_viridis_c(lim = c(0, 1))

ggsave(here("figures/plos-bio/cr_legend.tiff"))
```

#================================#
# Disease curves for single, co-infection, and invasion
#===============================#
# get data for disease curves
```{r}
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics (mon-cue)
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

# dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))
```

#------- single cue comparison ---------------#
# process data
```{r}
# get classification
si_cue.dv <- si_fitness.df %>% 
  mutate(classification = case_when(
    value > 9.2 ~ "High-performing",
    value <= 9.2 ~ "Poor-performing"
  ))

# process dynamics -> turn skinny
si_dc.df <- si_dyn.df %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(total = I+Ig)

# join with classificaiton
si_dc.df2 <- si_dc.df %>% left_join(select(si_cue.dv, id, classification), by = "id")
si_cue.dv
# split into top erforming and poor-performing cues
si_dc.high <- si_dc.df2 %>% filter(classification == "High-performing")
si_dc.poor <- si_dc.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
si_dc.high <- si_dc.high %>% left_join(ez_label %>% distinct(label_si, .keep_all = T), by = c("id" = "id_si"))

#write_parquet(si_dc.high, here("data/disease_curve/si_dc_high.parquet"))
#write_parquet(si_dc.poor, here("data/disease_curve/si_dc_poor.parquet"))
```

# plot
```{r}
si_dc.poor <- read_parquet(here("data/disease_curve/si_dc_poor.parquet"))
si_dc.high <- read_parquet(here("data/disease_curve/si_dc_high.parquet"))

# plot
si_dc.pre <- ggplot() +
  geom_path(data = si_dc.poor, aes(x= total, y = R, group = id), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Single infection\ngood performing cues", x = "Asexual & sexual iRBC per µL", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

si_dc.pl <- si_dc.pre +
  geom_point(data = si_dc.high %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
  geom_path(data = si_dc.high, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb"))  +
  theme(legend.position = "top") +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
```

#---------- co-infection monocue -------------#
```{r}
# get relevent variables
ci_dc.df <- ci_dyn.df %>% 
  filter(variable == "I1" | variable == "Ig1" | variable == "R")

# morph into skinny format
ci_dc.df <- tidyr::pivot_wider(ci_dc.df, names_from = variable, values_from = value, id_cols = c(time, label)) %>% 
  mutate(total = I1+Ig1)

# good cue bad cue
ci_cue.dv <- ci_fitness.df %>% 
  mutate(classification = case_when(
    value > 2.25 ~ "High-performing",
    value <= 2.25 ~ "Poor-performing"
  ))

# join with classificaiton
ci_dc.df2 <- ci_dc.df %>% left_join(ci_cue.dv, by = "label")

# split into top erforming and poor-performing cues
ci_dc.high <- ci_dc.df2 %>% filter(classification == "High-performing")
ci_dc.poor <- ci_dc.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
ci_dc.high2 <- ci_dc.high %>% left_join(ez_label, by = c("label" = "label_ci"))

#write_parquet(ci_dc.high2, here("data/disease_curve/ci_dc_high.parquet"))
#write_parquet(ci_dc.poor, here("data/disease_curve/ci_dc_poor.parquet"))
```

# plot
```{r}
ci_dc.poor <- read_parquet(here("data/disease_curve/ci_dc_poor.parquet"))
ci_dc.high2 <- read_parquet(here("data/disease_curve/ci_dc_high.parquet"))

# plot
ci_dc.pre <- ggplot() +
  geom_path(data = ci_dc.poor, aes(x= total, y = R, group = label), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Co-infection\ngood performing cues", x = "Asexual & sexual iRBC per µL", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

ci_dc.pl <- ci_dc.pre +
  geom_point(data = ci_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
  geom_path(data = ci_dc.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb")) +
  theme(legend.position = "top") +
  guides(color=guide_legend(nrow=2,byrow=TRUE))

```

#--------- dual cue --------------------------#
# process data
```{r}
# turn skinny
dual_dc.df <- dual_dyn.df %>% 
  mutate(label_alt = paste(label, "+" , label_b)) %>% 
  select(label_alt, time, variable, value) %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  distinct(label_alt, time, variable, .keep_all = T)

dual_dyn.df

dual_dc.df2 <- dual_dc.df %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, label_alt)) %>%
  mutate(total = I+Ig)

write_parquet(dual_dc.df2, here("data/disease_curve/dual_dc.parquet"))

# good dual cue -> list of good performing dual cues that encompass wide variety of cues
selected_dual_cue <- c("R log + I log", "R + Ig log", "G log + I log", "G log + Ig log", "Ig + I log")
bad_dual_cue <- c("G + I", "R + Ig", "R log + Ig", "G + R", "G + R log", "G + Ig", "Ig + I", "R + I", "R log + I")

# get classification -> R log10 + I log10 as the only good one
dual_dc.high <- dual_dc.df2 %>% filter(label_alt %in% selected_dual_cue) %>% 
  mutate(label_alt = gsub("log", "log10", label_alt))
dual_dc.poor <- dual_dc.df2 %>% filter(label_alt %in% bad_dual_cue) %>% 
  mutate(label_alt = gsub("log", "log10", label_alt))
#write_parquet(dual_dc.high, here("data/disease_curve/dual_dc_high.parquet"))
#write_parquet(dual_dc.poor, here("data/disease_curve/dual_dc_poor.parquet"))

```

# plot
```{r}
dual_dc.high <- read_parquet(here("data/disease_curve/dual_dc_high.parquet"))
dual_dc.poor <- read_parquet(here("data/disease_curve/dual_dc_poor.parquet"))

dual_dc.high2 <- dual_dc.high %>% 
  filter(label_alt == "R log10 + I log10")

# add 
dual_dc.pre <- ggplot() +
  geom_path(data = dual_dc.poor, aes(x= total, y = R, group = label_alt), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "High-performing\ndual cues per µL", x = "Asexual & sexual iRBC", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)


dual_dc.pl <- dual_dc.pre +
  geom_point(data = dual_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, shape = label_alt), color = "#4575b4", size = 3) +
  geom_path(data = dual_dc.high2, aes(x= total, y = R, group = label_alt), color = "#4575b4", size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme(legend.position = "none") +
  guides(color = guide_legend(override.aes = list(size = 0.1)))
```

#--------- co-infection static ----------------#
```{r}
# import in dynamics data
static_dyn.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static_dyn.ls <- lapply(static_dyn.ls, read_parquet)

# filter variable and transform
static_dyn.ls2 <- mclapply(static_dyn.ls, function(x){
  x %>% 
    filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>% 
    mutate(id_alt = paste(id_1, id_2)) %>% 
    tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
  mutate(total1 = I1+Ig1, total2 = I2+Ig2)
})

static_dc.df <- do.call(rbind, static_dyn.ls2)
static_dc.df <- static_dc.df %>% 
  mutate(id_1 = gsub(" .*", "", id_alt),
         id_2 = gsub(".* ", "", id_alt)) %>% 
  filter(id_1 != id_2)
#write_parquet(static_dc.df, here("data/disease_curve/static_dc.parquet"))
```

# further processing
```{r}
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
# get winners and losers
## import in fitness
static_fitness.df <- read.csv(here("data/ci_static.csv"))
## get winner situation
static_fitness.df2 <- static_fitness.df %>% 
  filter(id_1 != id_2) %>% 
  mutate(winning_id = case_when(
    fitness_difference > 0 ~ id_1,
    fitness_difference< 0 ~ id_2
  ),
  losing_id = case_when(
    fitness_difference < 0 ~ id_1,
    fitness_difference> 0 ~ id_2
  ))

# left join
static_dc.df2 <- static_dc.df %>% 
  left_join(select(static_fitness.df2, id_1, id_2, winning_id, losing_id, fitness_difference), by = c("id_1", "id_2"))

# get winner-loser difference in terms of I+Ig also filter out to onyl very strong fitness difference
static_dc.df3 <- static_dc.df2 %>% 
  mutate(total_diff = case_when(
    fitness_difference > 0 ~ total1-total2,
    fitness_difference< 0 ~ total2-total2
  ),
  total_winner = case_when(
    fitness_difference > 0 ~ total1,
    fitness_difference< 0 ~ total2
  ),
  total_loser = case_when(
    fitness_difference > 0 ~ total2,
    fitness_difference< 0 ~ total1
  )) %>% 
  filter(abs(fitness_difference) > 0.5)
```

# plot
```{r}
static_dc.pl <- ggplot() +
  geom_path(data = static_dc.df3, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  geom_path(data = static_dc.df3, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
          alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
  scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59"))  +
  theme_bw() + 
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1))
```


#---------co-infection invasion ---------------#
# get invasion dynamic
```{r}
# get invasion df
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))

# get cue range
ci_cue_range <- read.csv(here("data/cue_range_ci.csv"))
invasion_fitness.df2 <- invasion_fitness.df %>% 
  left_join(select(ci_cue_range, id, mut_cue = cue, mut_low = low, mut_high = high, mut_by = by), by = c("mut_id"= "id")) %>% 
  left_join(select(ci_cue_range, id, res_cue = cue, res_low = low, res_high = high, res_by = by), by = c("res_id"= "id"))
```

# function to get dynamic
```{r}
get_invasion_dyn <- function(df){
  # get cues
  mut_cue <- df$mut_cue
  res_cue <- df$res_cue
  
  # get info of cues (for co infection)
  if(stringr::str_detect(mut_cue, "-i")){mut_cue = gsub("*-i", "1", mut_cue)}
  if(stringr::str_detect(mut_cue, "-i", negate = T)){mut_cue = mut_cue}
  if(stringr::str_detect(res_cue, "-i")){res_cue = gsub("*-i", "2", res_cue)}
  if(stringr::str_detect(res_cue, "-i", negate = T)){res_cue = res_cue}
  
  # get log
  mut_log <- ifelse(stringr::str_detect(df$mut_id, "log"), "log10", "none")
  res_log <- ifelse(stringr::str_detect(df$res_id, "log"), "log10", "none")
  
  # get parameters
  mut_par <- c(df$mut_var1_opt, df$mut_var2_opt, df$mut_var3_opt, df$mut_var4_opt)
  res_par <- c(df$res_var1, df$res_var2, df$res_var3, df$res_var4)
  
  # get cue range
  mut_cue_range <- seq(df$mut_low, df$mut_high, by = df$mut_by)
  res_cue_range <- seq(df$res_low, df$res_high, by = df$res_by)
  
  # get dynamics of co infection
  ci_dyn <- chabaudi_ci_clean(
    parameters_cr_1 = mut_par,
    parameters_cr_2 = res_par,
                  immunity = "tsukushi",
                  parameters = parameters_tsukushi,
                  cue_1 = mut_cue,
                  cue_2 = res_cue,
                  cue_range_1 = mut_cue_range,
                 cue_range_2 = res_cue_range,
                log_cue_1 = mut_log,
                log_cue_2 = res_log,
                solver = "vode",
                time_range = seq(0, 30, 0.001),
    dyn = T)
  
  # append label to all df
 ci_dyn2 <- cbind(ci_dyn, mut_id = df$mut_id, res_id = df$res_id)
  
  # write
 write_parquet(ci_dyn2, paste0(here("data/ci_invasion_dyn/"), df$mut_id, "-", df$res_id, ".parquet"))
}
```


# run dynamic funciton
```{r}
# get function and parameters
source(here("functions/chabaudi_ci_clean.R"))
parameters_tsukushi <- c(R1 = 8.89*(10^6), # slightly higher
                lambda = 3.7*(10^5),
                mu = 0.025, 
                p = 8*(10^-6), # doubled form original
                alpha = 1, 
                alphag = 2, 
                beta = 5.721, 
                mum = 48, 
                mug = 4, 
                I0 = 43.85965, 
                Ig0 = 0, 
                a = 150, 
                b = 100, 
                sp = 1,
                psin = 16.69234,
                psiw = 0.8431785,
                phin = 0.03520591, 
                phiw = 550.842,
                iota = 2.18*(10^6),
                rho = 0.2627156)
# split
invasion.ls <- split(invasion_fitness.df2, seq(nrow(invasion_fitness.df2)))

# run function
mclapply(invasion.ls, get_invasion_dyn, mc.cores = 4)
```


# process data
```{r}
# import in invasion dynamics
invasion_dyn.ls <- list.files(path = here("data/ci_invasion_dyn"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls <- lapply(invasion_dyn.ls, read_parquet)

# filter and so on
invasion_dyn.ls2 <- mclapply(invasion_dyn.ls[167:177], mc.cores = 4, function(x){
  x2 <- x %>% 
    filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>% 
    mutate(id_alt = paste(mut_id, res_id)) %>% 
    select(id_alt, time, variable, value) %>% 
    tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
  mutate(total1 = I1+Ig1, total2 = I2+Ig2)
  
  write_parquet(x2, paste0(here("data/disease_curve/ci_invasion/"), unique(x2$id_alt), "_dc.parquet"))
})

# fetch data
invasion_dyn.ls2 <- list.files(path = here("data/disease_curve/ci_invasion"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls2 <- lapply(invasion_dyn.ls2, read_parquet)
invasion_dc.df <- do.call(rbind, invasion_dyn.ls2)
invasion_dc.df <- invasion_dc.df %>% 
  mutate(mut_id = gsub(" .*", "", id_alt),
         res_id = gsub(".* ", "", id_alt)) %>% 
  filter(mut_id != res_id)
#write_parquet(invasion_dc.df, here("data/disease_curve/invasion_dc.parquet"))
```

# further processing
```{r}
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
# get winners and losers
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))
invasion_dc.df2 <- invasion_dc.df %>% 
  left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>% 
  mutate(
  total_winner = case_when(
    fitness> 0 ~ total1,
    fitness< 0 ~ total2
  ),
  total_loser = case_when(
    fitness > 0 ~ total2,
    fitness < 0 ~ total1
  )) %>% 
  filter(abs(fitness) > 0.5)
```

# plot
```{r}
invasion_dc.pl <- ggplot() +
  geom_path(data = invasion_dc.df2, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  geom_path(data = invasion_dc.df2, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
          alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
  scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59"))  +
  theme_bw() + 
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) %>% 
  theme(legend.position = "none")
```


#--------- quantifying disease curve area ------------#
# function to calculate area between sets of points -> from https://stackoverflow.com/questions/3672260/area-covered-by-a-point-cloud-with-r
```{r}
library(splancs)
cha<-function(df){
  x <- df$total
  y <- df$R
chull(x,y)->i
return(areapl(cbind(x[i],y[i])))
}
```

# loop to get area: single infection
```{r}
# split df
si_dc_high.ls <- split(si_dc.high, si_dc.high$ez_label_si)
si_dc_poor.ls <- split(si_dc.poor, si_dc.poor$id)

# get area
si_dc_high.area <- cbind.data.frame(area = as.numeric(lapply(si_dc_high.ls, cha)), id_alt = names(lapply(si_dc_high.ls, cha)))
si_dc_poor.area <- cbind.data.frame(area = as.numeric(lapply(si_dc_poor.ls, cha)), id_alt = names(lapply(si_dc_poor.ls, cha)))


# join with fitness
si_fitness.df <- si_fitness.df %>% left_join(ez_label, by = c("id" = "id_si"))

si_dc_high.area2 <- si_dc_high.area %>% 
  left_join(si_fitness.df, by = c("id_alt" = "ez_label_si")) %>% 
  select(value, area) %>% 
  mutate(condition = "Single infection")

si_dc_poor.area2 <- si_dc_poor.area %>% 
  left_join(si_fitness.df, by = c("id_alt" = "id")) %>% 
  select(value, area) %>% 
  mutate(condition = "Single infection")
```

# coinfection
```{r}
# split
ci_dc_high.ls <- split(ci_dc.high2, ci_dc.high2$ez_label)
ci_dc_poor.ls <- split(ci_dc.poor, ci_dc.poor$label)

# run function to find area
ci_dc_high.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_high.ls, cha)), id_alt = names(lapply(ci_dc_high.ls, cha)), value = unique(ci_dc.high2$value))

ci_dc_poor.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_poor.ls, cha)), id_alt = names(lapply(ci_dc_poor.ls, cha)), value = unique(ci_dc.poor$value))

# edit and join
ci_dc_high.area2 <-  ci_dc_high.area %>% 
  select(area, value) %>% 
  mutate(condition = "Co-infection")

ci_dc_poor.area2 <-  ci_dc_poor.area %>% 
  select(area, value) %>% 
  mutate(condition = "Co-infection")
```

# dual cue
```{r}
# split
dual.dc <- read_parquet(here("data/disease_curve/dual_dc.parquet"))
dual_dc.ls <- split(dual.dc, dual.dc$label_alt)

# get area
dual_dc.area <- cbind.data.frame(area = as.numeric(lapply(dual_dc.ls, cha)), id_alt = names(lapply(dual_dc.ls, cha)))

# bind with fitness
dual_fitness.df <- dual_fitness.df %>% mutate(id_alt = paste(label, "+", label_b))
dual_dc.area2 <- dual_dc.area %>% 
  left_join(dual_fitness.df, by = "id_alt") %>% 
  select(area, value) %>% 
  mutate(condition = "Dual-cue") %>% 
  filter(value > 2)
dual_dc.area2
```

#------ get fitted scatter plot for all single infection, co infection, and dual cue --------#
```{r}
# rbind
si.area <- rbind(si_dc_high.area2, si_dc_poor.area2)
ci.area <- rbind(ci_dc_high.area2, ci_dc_poor.area2)
dual.area <- dual_dc.area2

# plot
library("ggpmisc")

si_area.pl <- ggplot(data = si.area, aes(x = area, y = value)) +
  geom_point() +
  stat_poly_line(color = "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness", color = "Status") +
  theme_bw()

ci_area.pl <- ggplot(data = ci.area, aes(x = area, y = value)) +
  geom_point() +
  stat_poly_line(color = "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness", color = "Status") +
  theme_bw()

dual_area.pl <- ggplot(data = dual.area, aes(x = area, y = value)) +
  geom_point() +
  stat_poly_line(color= "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness", color = "Status") +
  theme_bw()
```


#------- plot together with disease curve --------#
```{r}
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "v", widths = c(1, 0.45))

# co-infection
ci_vir.pl <- ggarrange(ci_dc.pl, ci_area.pl, align = "v", widths = c(1, 0.45))

# dual-cue
dual_vir.pl <- ggarrange(dual_dc.pl, dual_area.pl, align = "v", widths = c(1, 0.45))
```


#--------- static area comparison -------------#
# compute area
```{r}
# import in dc dynamic and fitness
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
static_fitness.df <- read.csv(here("data/ci_static.csv"))

# get winner and loser
static_dc.df4 <- static_dc.df %>% 
  left_join(select(static_fitness.df, id_1, id_2, fitness_difference), by = c("id_1", "id_2")) %>%
  filter(id_1 != id_2) %>% 
  mutate(
  total_winner = case_when(
    fitness_difference > 0 ~ total1,
    fitness_difference< 0 ~ total2
  ),
  total_loser = case_when(
    fitness_difference > 0 ~ total2,
    fitness_difference< 0 ~ total1
  ))%>% 
  filter(abs(fitness_difference) > 0.5)

# split by winner and loser
static_dc.ls1 <- split(select(static_dc.df4, R, total = total_winner), static_dc.df4$id_alt)
static_dc.ls2 <- split(select(static_dc.df4, R, total = total_loser), static_dc.df4$id_alt)

# get area
static_win.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls1, cha)), status = "Winner")
static_loser.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls2, cha)), status = "Loser")

# pair
static.area <- cbind(select(static_win.area, Winner = area),
                     select(static_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
```

# plot static
```{r}
static_area.pl <- ggpaired(static.area, cond1 = "Winner", cond2 = "Loser", line.color = "classification", alpha = 0.2) +
  labs(x = "Status", y = "Area", color = "Comparison\n(Static)") +
  scale_color_manual(values = c("Loser larger area" = "#fc8d59", "Winner larger area" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = 0) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
```


#--------- invasion area comparison -----------------#
# get area
```{r}
# import in dc dynamic and fitness
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))

invasion_dc.df4 <- invasion_dc.df %>% 
  left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>% 
  mutate(
  total_winner = case_when(
    fitness> 0 ~ total1,
    fitness< 0 ~ total2
  ),
  total_loser = case_when(
    fitness > 0 ~ total2,
    fitness < 0 ~ total1
  )) %>% 
  filter(abs(fitness) > 0.5)

# split by winner and loser
invasion_dc.ls1 <- split(select(invasion_dc.df4, R, total = total_winner), invasion_dc.df4$id_alt)
invasion_dc.ls2 <- split(select(invasion_dc.df4, R, total = total_loser), invasion_dc.df4$id_alt)

# get area
invasion_win.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls1, cha)), status = "Winner")
invasion_loser.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls2, cha)), status = "Loser")

# pair
invasion.area <- cbind(select(invasion_win.area, Winner = area),
                     select(invasion_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
```

# plot
```{r}
invasion_area.pl <-ggpaired(invasion.area, cond1 = "Winner", cond2 = "Loser", line.color = "classification", alpha = 0.2) +
  labs(x = "Status", y = "Area", color = "Comparison\n(Invasive)") +
  scale_color_manual(values = c("Loser larger area" = "#fc8d59", "Winner larger area" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = 0) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
  
```

#------ plot together -------------#
```{r}
# pairwise comparison for static and invasive comeptition
heterocue_comp.pl <- ggarrange(static_area.pl, invasion_area.pl, ncol = 2, nrow = 1, align = "v")

# join inthe other disease curves
ggarrange(si_vir.pl, ci_vir.pl, dual_vir.pl, heterocue_comp.pl, ncol = 2, nrow = 2, labels = c("A", "B", "C", "D"), heights = c(1, 0.8))

ggsave(here("figures/plos-bio/virulence.tiff"), units = "px", width = 2250, height = 1600, scale = 2, dpi=300, bg = "white")
```


#====================================#
# dual cue dynamics figure
#===================================#

# get dual dynamics
```{r}
dual.dyn <- chabaudi_si_clean(
  parameters_cr = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 30, by = 1e-3),
  cue_range =  seq(6, 7, by = 1/500),
  cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
  cue = "R",
  cue_b = "I",
  log_cue = "log10",
  log_cue_b = "log10",
  solver = "vode",
  dyn = T
)

# filter out relevent dataframes
dual.dyn_f <- dual.dyn %>% 
  filter(variable %in% c("I", "Ig", "G", "R", "N", "W"))

# cr only
dual.dyn_cr <- dual.dyn %>% filter(variable == "cr")
```

# plot
```{r}
dual_I.plt <- ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "I"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "I" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Asexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_Ig.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "Ig"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "Ig" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Sexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_G.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "G"), aes(x = time, y = value/(10^4)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "G" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^4)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^4, name="Gametocyte per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_R.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "R"), aes(x = time, y = value/(10^7)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "R" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^7)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^7, name="RBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_N.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "N"), aes(x = time, y = value*10),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "N" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*10), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.1, name="Indiscriminate immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_W.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "W"), aes(x = time, y = value*2),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "W" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*2), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.5, name="Targeted immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")
```

# plot together
```{r}
ggarrange(dual_I.plt, dual_Ig.plt, dual_G.plt, dual_R.plt, dual_N.plt, dual_W.plt, nrow = 3, ncol = 2, align = "hv", labels = c("A", "B", "C", "D", "E", "F"))
ggsave(here("figures/plos-bio/dual_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1, dpi=300, bg = "white")
```

#======================================#
# Single and co-infection verification
#======================================#
# single infection
```{r}
# import in all single infection data
si_val.ls <- list.files(path = here("data/si_validation"), pattern = "*.csv", full.names = T)

si_val.df <- lapply(si_val.ls, read.csv)
si_val.df <- do.call(rbind, si_val.df)

# get max fitness from simulation. left join with si_opt
si_opt.df <- read.csv(here("data/si_opt.csv"))

# we can see that all of the randomly simulated models have a fitness value that is less than the optimized model
si_val.df2 <- select(si_val.df, V1, id) %>%
  left_join(si_opt.df, by =c("id" = "id")) %>% 
  mutate(fitness_difference = fitness_20 - V1) %>% 
  left_join(select(ez_label, id_si, ez_label_si), by = c("id" = "id_si"))
```

### Model validation
```{r}
# read in the results file
ci_val.ls <- list.files(path = here("data/ci_validation"), pattern = "*.csv", full.names = T)
ci_val.ls2 <- lapply(ci_val.ls, read.csv)
ci_val.df <- do.call(rbind, ci_val.ls2)

ci_val.df2
ci_val.df2 <- ci_val.df %>% 
  left_join(select(ez_label, label_ci, ez_label), by = c("label" = "label_ci"))
```

# plot
```{r}
si_val.plt <- ggplot(data = si_val.df2, aes(x = fitness_difference)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label_si, scales = "free", ncol = 3) +
  labs(x = "Optimized fitness - random fitness", y = "Frequency") +
  theme_bw()

ci_val.plt <- ggplot(data = ci_val.df2, aes(x = V1)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label, scales = "free", ncol = 4) +
  labs(x = "Fitness difference between\noptimized and random strain", y = "Frequency") +
  theme_bw()

ggarrange(si_val.plt, ci_val.plt, align = "hv", labels = c("A", "B"), widths = c(3,4))
ggsave(here("figures/plos-bio/validation.tiff"), units = "px", width = 2250, height = 1300, scale = 1.6, dpi=300, bg = "white")
```

#=========================#
# Monte carlo dynamics supplementary
#=========================#
# run code in report 16
```{r}
mc_dyn_a <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = cr)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = cr_bot, ymax = cr_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Conversion rate") +
  theme_bw()
  
# plot fitness timeseries. When if tiness lost? At the latter part
mc_dyn_b  <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = tau)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = tau_bot, ymax = tau_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Transmission potential") +
  theme_bw()

ggarrange(mc_dyn_a, mc_dyn_b, ncol = 1, align = "v", labels = c("A", "B"))
ggsave(here("figures/plos-bio/MC_dyn.tiff"), units = "px", width = 2000, height = 2600, scale = 1, dpi=300, bg = "white")
```



